1 Set Up

1.1 R Code

#packages we need for this code file
library(ggplot2)
library(mgcv)
library(lubridate)
library(zoo)
library(tidyverse)
library(dplyr)
library(DHARMa)
library(mgcViz)
library(extrafont)
library(arm)
loadfonts()
library(stargazer)
#define functions we will need for analysis
#expit function
expit<-function(x){
  return(exp(x)/(1 + exp(x)))
}

#logit function
logit<-function(x){
  return(log(x/(1 - x)))
}

1.2 Data

#read in data
main_analysis_data<-read.csv("./Data/full_data_set_9_6_21_unintentional.csv")

################################## set up data set ################################
#add the intervention dates and time period data
main_analysis_data$Intervention_First_Date<-as.Date(main_analysis_data$Intervention_First_Date)
main_analysis_data$Time_Period_Start<-as.Date(main_analysis_data$Time_Period_Start)
names(main_analysis_data)[which(colnames(main_analysis_data) == "sum_deaths")] <- "imputed_deaths"

################################## set up the Regions ##############################
#set up the regions according to Census: https://www.census.gov/geographies/reference-maps/2010/geo/2010-census-regions-and-divisions-of-the-united-states.html
NE.name <- c("Connecticut","Maine","Massachusetts","New Hampshire",
             "Rhode Island","Vermont","New Jersey","New York",
             "Pennsylvania")

MW.name <- c("Indiana","Illinois","Michigan","Ohio","Wisconsin",
             "Iowa","Kansas","Minnesota","Missouri","Nebraska",
             "North Dakota","South Dakota")

S.name <- c("Delaware","District of Columbia","Florida","Georgia",
            "Maryland","North Carolina","South Carolina","Virginia",
            "West Virginia","Alabama","Kentucky","Mississippi",
            "Tennessee","Arkansas","Louisiana","Oklahoma","Texas")

W.name <- c("Arizona","Colorado","Idaho","New Mexico","Montana",
            "Utah","Nevada","Wyoming","Alaska","California",
            "Hawaii","Oregon","Washington")

region.list <- list(
  Northeast=NE.name,
  Midwest=MW.name,
  South=S.name,
  West=W.name)

#initialize vector with "West" and then impute the other regions for the states
main_analysis_data$Region<-rep("West", nrow(main_analysis_data))
for(state in unique(main_analysis_data$State)){
  if(state %in% region.list$Northeast){
    main_analysis_data$Region[main_analysis_data$State == state]<-"Northeast"
  }else if(state %in% region.list$Midwest){
    main_analysis_data$Region[main_analysis_data$State == state]<-"Midwest"
  }else if(state %in% region.list$South){
    main_analysis_data$Region[main_analysis_data$State == state]<-"South"
  }
}

2 Exploratory Data Analysis

2.1 Overdose Deaths

############################## EDA: Plot the Outcome and Intervention Trends ###############################
#plot the time series of the number of deaths and probability of overdose death
od_data_recent <- read.csv("./Data/unintentional_od_yearly_1999_2019_17_up.txt", 
                           sep = "\t", stringsAsFactors = FALSE)
od_data_recent$Deaths <- as.numeric(od_data_recent$Deaths)
od_data_recent<-od_data_recent[!is.na(od_data_recent$Year),] #delete the rows that just contains data set description info
od_data_recent<- od_data_recent %>% 
  filter(Year > 1999 & Year < 2020) %>% 
  group_by(Year) %>%
  summarise(sum_deaths = sum(Deaths, na.rm = TRUE))

# pdf("./Figures/total_od_deaths_all_paper_9_6_21_2000_2019.pdf")
ggplot(data = od_data_recent, mapping = aes(x = Year, y = sum_deaths)) +
  geom_line() + 
  geom_point() +
  labs(x = "Year", y = "Yearly Number of Unintentional Drug Overdose Deaths in the 50 U.S. States") +
  theme(panel.background = element_rect("white"), 
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"),
        axis.title=element_text(family="Times", size=10, face="bold"),
        axis.text=element_text(family="Times",size=10)) +
  scale_x_continuous(breaks = seq(2000, 2020, by = 2)) +
  ylim(c(0, 62000))

# dev.off()

main_analysis_data_sum <- main_analysis_data %>% 
  group_by(year = year(Time_Period_Start)) %>%
  summarise(total_deaths = sum(imputed_deaths),
            sum_pop = sum(population)/2,
            total_prop = sum(imputed_deaths)/(sum(population)/2),
            total_prop_by_100000 = 100000*sum(imputed_deaths)/(sum(population)/2))
# %>%mutate(date = as.Date(as.yearmon(year)))

#compute the percentage difference between 2000 and 2019
death_2000 <- main_analysis_data_sum$total_deaths[main_analysis_data_sum$year == 2000]
death_2019 <- main_analysis_data_sum$total_deaths[main_analysis_data_sum$year == 2019]

((death_2019 - death_2000)/death_2000)*100
## [1] 435.5701

2.2 Intervention: DIH Prosecutions

#plot the number of states with an intervention for each time point
#first, create a data set to find the number of states with an intervention at each time point
#initialize the data set with the start date of the time period
num_states_with_intervention<-data.frame("Start_Date" =
                                    unique((main_analysis_data$Intervention_First_Date[!is.na(main_analysis_data$Intervention_First_Date)])))
numStates<-c()

#for each time period i, we first find the states where the first intervention date occurred before i
#then, we append it to numStates
for(i in unique((num_states_with_intervention$Start_Date))){
  states_w_int<-unique(main_analysis_data$State[(main_analysis_data$Intervention_First_Date)<=i])
  numStates<-append(numStates, length(states_w_int[!is.na(states_w_int)]))
}
num_states_with_intervention$numStates<-numStates
num_states_with_intervention$Start_Date <- as.Date(num_states_with_intervention$Start_Date)
num_states_with_intervention <- rbind(data.frame("Start_Date" = c(as.Date("2000-01-01"),
                                                                  as.Date("2019-12-31")),
                                                 "numStates" = c(0, max(num_states_with_intervention$numStates))),
                                      num_states_with_intervention)
num_states_with_intervention <- num_states_with_intervention %>% 
  arrange(Start_Date) %>%
  mutate(lag_numStates = lag(numStates))

num_states_with_intervention <- num_states_with_intervention %>%
  pivot_longer( c("lag_numStates", "numStates"), "numStates")

# pdf("Figures/num_states_with_intervention_9_6_21.pdf")
ggplot(num_states_with_intervention, aes(x = Start_Date, y = value, group = 1)) +
  geom_line() +
  # geom_point(num_states_with_intervention[num_states_with_intervention$numStates == "numStates",],
  #            mapping = aes(x = Start_Date, y = value, group = 1), size = 1) +
  labs(x = "Year", y = "Cumulative Number of States to have DIH Prosecutions") +
  theme(axis.text=element_text(family="Times",size=10),
        axis.title=element_text(family="Times", size=10, face="bold"),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"),
        axis.text.x = element_text(family="Times", size=10),
        panel.background = element_rect("white")) +
  scale_x_date(date_labels="%Y", breaks = seq(as.Date("2000-01-01"), as.Date("2018-01-01"), by = "2 years"))

# dev.off()

2.3 Policy Dates

#add the intervention variable as a measure of number of states with DIH prosecution
main_analysis_data <- main_analysis_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention = sum(Intervention_Redefined))

############################# Look at the policy dates #######################
policy_dates <- main_analysis_data %>% 
  group_by(State) %>%
  summarise(unique(format(Intervention_First_Date, "%Y-%m")),
            unique(format(as.Date(Naloxone_Pharmacy_Yes_First_Date), "%Y-%m")),
            unique(format(as.Date(Naloxone_Pharmacy_No_First_Date), "%Y-%m")),
            unique(format(as.Date(Medical_Marijuana_First_Date), "%Y-%m")),
            unique(format(as.Date(Recreational_Marijuana_First_Date), "%Y-%m")),
            unique(format(as.Date(PDMP_First_Date), "%Y-%m")),
            unique(format(as.Date(GSL_First_Date), "%Y-%m")),
            unique(format(as.Date(Medicaid_Expansion_First_Date), "%Y-%m")))
names(policy_dates) <- c("State", "DIH Prosecutions", "NAL: Pharmacists Yes",
                         "NAL: Pharmacists No", "MML", "RML", "PDMP", "GSL",
                         "Medicaid")
# write.csv(policy_dates, "./Data/policy_dates_9_6_21.csv")

3 Main Analysis: Effect of At Least One DIH Prosecution Report in Media on Unintentional Overdose Deaths

3.1 Analysis

############################## Run Model with Spline Time Effects by Region ###############################
#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
main_analysis_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                           s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                           Naloxone_Pharmacy_Yes_Redefined +
                           Naloxone_Pharmacy_No_Redefined +
                           Medical_Marijuana_Redefined +
                           Recreational_Marijuana_Redefined +
                           GSL_Redefined +
                           PDMP_Redefined +
                           Medicaid_Expansion_Redefined +
                           Intervention_Redefined + 
                           num_states_w_intervention,
                         data = main_analysis_data, family = "binomial")

#summary output of the model
stargazer(main_analysis_model, type = "html", dep.var.labels = c("Unintentional Overdose Deaths"))
Dependent variable:
Unintentional Overdose Deaths
StateAlaska 0.250***
(0.028)
StateArizona 0.305***
(0.014)
StateArkansas -0.396***
(0.020)
StateCalifornia -0.170***
(0.013)
StateColorado 0.092***
(0.016)
StateConnecticut 0.192***
(0.016)
StateDelaware 0.438***
(0.022)
StateFlorida 0.268***
(0.012)
StateGeorgia -0.087***
(0.013)
StateHawaii -0.217***
(0.026)
StateIdaho -0.159***
(0.024)
StateIllinois -0.022*
(0.013)
StateIndiana 0.079***
(0.014)
StateIowa -0.745***
(0.021)
StateKansas -0.343***
(0.019)
StateKentucky 0.641***
(0.014)
StateLouisiana 0.283***
(0.014)
StateMaine 0.167***
(0.022)
StateMaryland -1.069***
(0.019)
StateMassachusetts 0.221***
(0.014)
StateMichigan -0.020
(0.014)
StateMinnesota -0.626***
(0.017)
StateMississippi -0.110***
(0.018)
StateMissouri 0.190***
(0.015)
StateMontana -0.357***
(0.029)
StateNebraska -0.896***
(0.029)
StateNevada 0.432***
(0.017)
StateNew Hampshire 0.272***
(0.020)
StateNew Jersey 0.105***
(0.013)
StateNew Mexico 0.620***
(0.017)
StateNew York -0.237***
(0.013)
StateNorth Carolina 0.175***
(0.013)
StateNorth Dakota -1.054***
(0.045)
StateOhio 0.453***
(0.012)
StateOklahoma 0.377***
(0.015)
StateOregon -0.187***
(0.018)
StatePennsylvania 0.444***
(0.012)
StateRhode Island 0.255***
(0.022)
StateSouth Carolina 0.226***
(0.015)
StateSouth Dakota -0.970***
(0.043)
StateTennessee 0.437***
(0.013)
StateTexas -0.231***
(0.012)
StateUtah 0.012
(0.018)
StateVermont -0.146***
(0.031)
StateVirginia -0.111***
(0.014)
StateWashington 0.071***
(0.015)
StateWest Virginia 0.896***
(0.015)
StateWisconsin -0.038***
(0.015)
StateWyoming 0.018
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.025***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.007
(0.007)
Medical_Marijuana_Redefined 0.063***
(0.006)
Recreational_Marijuana_Redefined -0.037***
(0.009)
GSL_Redefined 0.036***
(0.006)
PDMP_Redefined -0.021***
(0.006)
Medicaid_Expansion_Redefined 0.100***
(0.006)
Intervention_Redefined 0.068***
(0.005)
num_states_w_intervention 0.014***
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -11.059***
(0.071)
Observations 2,000
Adjusted R2 0.914
Log Likelihood -16,716.690
UBRE 8.806
Note: p<0.1; p<0.05; p<0.01

3.2 Plots

#check diagnostics
gam.check(main_analysis_model)

## 
## Method: UBRE   Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-1.448796e-06,6.372085e-05]
## (score 8.805847 & scale 1).
## Hessian positive definite, eigenvalue range [0.0001512652,0.0003395804].
## Model rank =  95 / 95 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                k'  edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest   9.00 8.86    1.05    0.99
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.93    1.05    1.00
## s(Time_Period_ID):as.factor(Region)South     9.00 8.90    1.05    0.99
## s(Time_Period_ID):as.factor(Region)West      9.00 8.57    1.05    0.99
main_analysis_model_object <- getViz(main_analysis_model)

midwest_plot <- plot(sm(main_analysis_model_object, 1)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + 
  theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for Midwest") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005", "2010", "2015"))  +
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

northeast_plot <- plot(sm(main_analysis_model_object,2)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + 
  theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for Northeast") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005", "2010", "2015"))+
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

south_plot <- plot(sm(main_analysis_model_object, 3)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + 
  theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for South") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005","2010", "2015"))+
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

west_plot <- plot(sm(main_analysis_model_object, 4)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for West") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005", "2010", "2015"))+
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

# pdf("./Figures/time_smoothed_effects_9_6_21.pdf")
gridPrint(midwest_plot, northeast_plot, south_plot, west_plot, ncol = 2)

# dev.off()

total_pop <- main_analysis_data %>% 
  group_by(year = year(Time_Period_Start), State) %>% 
  summarise(pop = unique(population)) %>%
  group_by(year) %>% 
  summarise(sum(pop))

main_analysis_data %>% 
  group_by(year(Time_Period_Start)) %>% 
  summarise(sum_deaths = sum(imputed_deaths)*100000) %>% 
  mutate(sum_deaths/total_pop$`sum(pop)`)
## # A tibble: 20 x 3
##    `year(Time_Period_Start)`  sum_deaths `sum_deaths/total_pop$\`sum(pop)\``
##  *                     <dbl>       <dbl>                               <dbl>
##  1                      2000 1151390000                                 2.35
##  2                      2001 1276465000                                 2.57
##  3                      2002 1614890000                                 3.22
##  4                      2003 1799140000.                                3.55
##  5                      2004 1953250000                                 3.82
##  6                      2005 2216225000                                 4.29
##  7                      2006 2603525000                                 4.99
##  8                      2007 2730800000                                 5.18
##  9                      2008 2787500000                                 5.23
## 10                      2009 2848100000                                 5.29
## 11                      2010 2969200000                                 5.48
## 12                      2011 3276800000                                 6.01
## 13                      2012 3291600000                                 5.98
## 14                      2013 3541000000                                 6.38
## 15                      2014 3847600000                                 6.88
## 16                      2015 4381900000                                 7.77
## 17                      2016 5433100000                                 9.56
## 18                      2017 6084700000                                10.6 
## 19                      2018 5847900000                                10.1 
## 20                      2019 6166500000                                10.6
main_analysis_data %>%
  group_by(State, year(Time_Period_Start)) %>%
  summarise(death_rate = (sum(imputed_deaths)/unique(population))*100000) %>%
  group_by(State) %>%
  summarise(first_death_rate = death_rate[1],
            last_death_rate = death_rate[20]) %>%
  mutate(range_death_rate = last_death_rate - first_death_rate) %>% 
  filter(range_death_rate == min(range_death_rate) | range_death_rate == max(range_death_rate))
## # A tibble: 2 x 4
##   State         first_death_rate last_death_rate range_death_rate
##   <chr>                    <dbl>           <dbl>            <dbl>
## 1 Nebraska                 0.705            3.61             2.90
## 2 West Virginia            1.74            25.6             23.9
# #summarize the DIH dates
# main_analysis_data %>% 
#   group_by(Time_Period_Start) %>%
#   summarise(prop_w_intervention = mean(Intervention_Redefined > 0)) %>%
#   View()

#create a data frame to store the results and compute the confidence intervals
#initialize the columns
main_analysis_plot_table<-data.frame(State = main_analysis_data$State)
main_analysis_plot_table$Fitted<-rep(NA, nrow(main_analysis_plot_table))
main_analysis_plot_table$Observed<-rep(NA, nrow(main_analysis_plot_table))
main_analysis_plot_table$Time<-main_analysis_data$Time_Period_ID
main_analysis_plot_table$Time_Date<-main_analysis_data$Time_Period_Start
main_analysis_plot_table$Intervention_Date<-main_analysis_data$Intervention_First_Date

#we want to compare the fitted probability of overdose death and the observed values to see how the model does as a goodness of fit visual test
for(i in unique(main_analysis_plot_table$State)){
  #for each state, we first subset the main analysis data to only look at the data for that state
  index_of_state<-which(main_analysis_plot_table$State == i)
  #impute the fitted and observed probability of overdose death for the state
  main_analysis_plot_table$Fitted[index_of_state]<-fitted(main_analysis_model)[index_of_state]
  main_analysis_plot_table$Observed[index_of_state] <- (main_analysis_data$imputed_deaths[main_analysis_data$State == i]/main_analysis_data$population[main_analysis_data$State == i])
}
#plot to compare the fitted values vs observed deaths
# pdf("./Figures/GAM_fitted_vs_actual_by_Region_9_6_21_with_int_date_full_data.pdf")
ggplot(data = main_analysis_plot_table, aes(x = Time_Date, y = Observed*100000, group = 1,
                                            color = "Observed")) +
  geom_line(aes(color = "Observed"))+ geom_point(aes(color = "Observed"), size = .5, alpha = .5) +
  geom_line(data = main_analysis_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1,
                                                 color = "Estimate")) +
  geom_point(data = main_analysis_plot_table, aes(x = Time_Date, y = Fitted*100000,
                                                  color = "Estimate"),
             size = .5, alpha = .5) +
  scale_color_manual(values = c("pink", "blue")) + 
  geom_vline(main_analysis_plot_table, mapping = aes(xintercept = Intervention_Date)) +
  facet_wrap(facets = vars(State), scales = "free_y", ncol = 5) +
  theme(axis.text.x = element_text(hjust = 1, size = 6, family = "Times"),
        axis.text.y = element_text(size = 6, family = "Times"),
        axis.title=element_text(size = 10,face = "bold", family = "Times"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(size=8),
        panel.background = element_rect("white"),
        legend.position = "bottom") +
  labs(x = "Year", y = "Unintentional Drug Overdose Death Rates per 100,000 People",
       color = "")

# dev.off()
#diagnostic plots to check model
residTab <- data.frame(logit_fitted_vals = logit(fitted(main_analysis_model)),
                       resids = residuals(main_analysis_model))
# pdf("./Figures/deviance_resids_9_6_21.pdf")
ggplot(residTab, aes(x = logit_fitted_vals, y = resids)) +
  geom_point() +
  theme(text = element_text(size = 10, family = "Times"),
        title = element_text(size = 10, family = "Times", face = "bold"),
        panel.background = element_rect(fill = "white", color = "black")) +
  # theme_classic() +
  labs(x = "Logistic Function of Fitted Values", y = "Deviance Residuals")

# dev.off()

pred_vals <- predict(main_analysis_model)
resids <- resid(main_analysis_model, type = "response")
# pdf("./Figures/binned_resids_plot_9_6_21.pdf")
par(font.lab = 2)
par(family = "Times")
binnedplot(pred_vals, resids, main = "", 
           xlab = "Average Logistic Function of Fitted Values",
           ylab = "Average Residuals")

# dev.off()

3.3 Compile Results

############################## Main Analysis: Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
main_analysis_full_table<-data.frame(coef(main_analysis_model))
#check to see how the table looks
head(main_analysis_full_table)
##                 coef.main_analysis_model.
## (Intercept)                  -11.05928955
## StateAlaska                    0.25032740
## StateArizona                   0.30457546
## StateArkansas                 -0.39568637
## StateCalifornia               -0.17047311
## StateColorado                  0.09249597
#rename the column to "Coefficient_Estimate"
colnames(main_analysis_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined", 
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(main_analysis_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(main_analysis_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(main_analysis_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(main_analysis_full_table)[i]<-substr(rownames(main_analysis_full_table)[i], start = 6,
                                                    stop = nchar(rownames(main_analysis_full_table)[i]))

    }else if(rownames(main_analysis_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(main_analysis_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(main_analysis_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(main_analysis_full_table)[i]<-paste("Smoothed Time for Region ",
                                                   substr(rownames(main_analysis_full_table)[i], start = 36,
                                                          stop = nchar(rownames(main_analysis_full_table)[i])),
                                                   sep = "")

    }
  }
}

#confidence intervals for the coefficients
main_analysis_full_table$Coefficient_Lower_Bound<-main_analysis_full_table$Coefficient_Estimate - 
  1.96*summary(main_analysis_model)$se
main_analysis_full_table$Coefficient_Upper_Bound<-main_analysis_full_table$Coefficient_Estimate + 
  1.96*summary(main_analysis_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
main_analysis_full_table$Odds_Ratio<-exp(main_analysis_full_table$Coefficient_Estimate)
main_analysis_full_table$Odds_Ratio_LB<-exp(main_analysis_full_table$Coefficient_Lower_Bound)
main_analysis_full_table$Odds_Ratio_UB<-exp(main_analysis_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
main_analysis_full_table$Standard_Error<-summary(main_analysis_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
main_analysis_full_table$p_value<-c(summary(main_analysis_model)$p.pv, 
                                    rep(NA, length(coef(main_analysis_model)) - 
                                          length(summary(main_analysis_model)$p.pv)))

head(main_analysis_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama         -11.05928955            -11.19842493
## Alaska                      0.25032740              0.19627234
## Arizona                     0.30457546              0.27724686
## Arkansas                   -0.39568637             -0.43420110
## California                 -0.17047311             -0.19647490
## Colorado                    0.09249597              0.06106175
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama             -10.9201542 1.574025e-05  1.369575e-05
## Alaska                          0.3043825 1.284446e+00  1.216858e+00
## Arizona                         0.3319041 1.356049e+00  1.319492e+00
## Arkansas                       -0.3571716 6.732178e-01  6.477820e-01
## California                     -0.1444713 8.432658e-01  8.216220e-01
## Colorado                        0.1239302 1.096909e+00  1.062965e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  1.808995e-05     0.07098744  0.000000e+00
## Alaska             1.355787e+00     0.02757911  1.119133e-19
## Arizona            1.393619e+00     0.01394317 8.851746e-106
## Arkansas           6.996524e-01     0.01965037  3.546730e-90
## California         8.654797e-01     0.01326622  8.583035e-38
## Colorado           1.131937e+00     0.01603787  8.052817e-09
tail(main_analysis_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4           0.17071662              0.13483168
## Smoothed Time for Region West.5           0.04607653             -0.02157082
## Smoothed Time for Region West.6          -0.03846207             -0.13394493
## Smoothed Time for Region West.7          -0.03151909             -0.14422243
## Smoothed Time for Region West.8           0.01989377             -0.11800391
## Smoothed Time for Region West.9           0.18842096              0.07222067
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4              0.20660157  1.1861546
## Smoothed Time for Region West.5              0.11372387  1.0471545
## Smoothed Time for Region West.6              0.05702078  0.9622682
## Smoothed Time for Region West.7              0.08118425  0.9689725
## Smoothed Time for Region West.8              0.15779145  1.0200930
## Smoothed Time for Region West.9              0.30462125  1.2073417
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4     1.1443441      1.229493     0.01830865
## Smoothed Time for Region West.5     0.9786602      1.120443     0.03451395
## Smoothed Time for Region West.6     0.8746382      1.058678     0.04871574
## Smoothed Time for Region West.7     0.8656952      1.084571     0.05750171
## Smoothed Time for Region West.8     0.8886926      1.170922     0.07035596
## Smoothed Time for Region West.9     1.0748925      1.356111     0.05928586
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#save the table into a CSV
# write.csv(round(main_analysis_full_table,5), "./Data/coefficients_GAM_9_6_21_full_data_uninentional_od.csv")

#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(main_analysis_full_table) %in% covariates)
main_analysis_covariate_table<-(round(main_analysis_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(main_analysis_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                           "Naloxone_Pharmacy_No",
                                           "Medical_Marijuana",
                                           "Recreational_Marijuana",
                                           "GSL", 
                                           "PDMP", 
                                           "Medicaid_Expansion",
                                           "Intervention", 
                                           "Number of States with DIH Prosecutions")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
main_analysis_covariate_table<-rbind(main_analysis_covariate_table, 
                                     main_analysis_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
main_analysis_covariate_table<-main_analysis_covariate_table[,-which(colnames(main_analysis_covariate_table) %in%
                                                                       c("Coefficient_Estimate", "Coefficient_Lower_Bound",
                                                                         "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(main_analysis_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(main_analysis_covariate_table, 10)
##                                        Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                          9.749800e-01 9.602700e-01
## Naloxone_Pharmacy_No                           1.006720e+00 9.939600e-01
## Medical_Marijuana                              1.065370e+00 1.053630e+00
## Recreational_Marijuana                         9.637100e-01 9.475300e-01
## GSL                                            1.036210e+00 1.024840e+00
## PDMP                                           9.793700e-01 9.682900e-01
## Medicaid_Expansion                             1.105550e+00 1.092500e+00
## Intervention                                   1.070800e+00 1.059460e+00
## Number of States with DIH Prosecutions         1.014340e+00 1.009550e+00
## Intercept/Alabama                              1.574025e-05 1.369575e-05
##                                         RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes                  9.899100e-01 0.00108
## Naloxone_Pharmacy_No                   1.019660e+00 0.30344
## Medical_Marijuana                      1.077240e+00 0.00000
## Recreational_Marijuana                 9.801500e-01 0.00002
## GSL                                    1.047710e+00 0.00000
## PDMP                                   9.905900e-01 0.00033
## Medicaid_Expansion                     1.118750e+00 0.00000
## Intervention                           1.082260e+00 0.00000
## Number of States with DIH Prosecutions 1.019150e+00 0.00000
## Intercept/Alabama                      1.808995e-05 0.00000
#save the table into a CSV
# write.csv(round(main_analysis_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_unintentional_od.csv")

3.4 Attributable Deaths

############################## Main Analysis: Number of Overdose Deaths Attributed to Intervention ###############################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_main_analysis<-main_analysis_data[which(main_analysis_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_main_analysis<-expit(-coef(main_analysis_model)["Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
                                    + logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(main_analysis_model) - 1.96*summary(main_analysis_model)$se
coef_ub<-coef(main_analysis_model) + 1.96*summary(main_analysis_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_main_analysis<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
                                       + logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))

prob_od_no_int_UB_main_analysis<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
                                       + logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))


#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_main_analysis$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_main_analysis$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_main_analysis$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
                          - prob_od_no_int_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
  index<-index + 1
}

num_attr_od_main_analysis<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_main_analysis$Time_Period_ID)),
                                      "Time_Start" = sort(unique(attr_deaths_anlys_main_analysis$Time_Period_Start)),
                                      "Num_Attr_Deaths" = num_attr_od,
                                      "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                      "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_main_analysis$Num_Attr_Deaths)
## [1] 36888.23
summary(num_attr_od_main_analysis$Num_Attr_Deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.01  412.33  774.20  922.21 1273.36 2130.14
num_attr_od_main_analysis$Time_Start<-as.Date(num_attr_od_main_analysis$Time_Start)

#compute the 95% CI for the total
sum(num_attr_od_main_analysis$Num_Attr_Deaths_LB)
## [1] 31311.05
sum(num_attr_od_main_analysis$Num_Attr_Deaths_UB)
## [1] 42406.57
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_main_analysis<-num_attr_od_main_analysis %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_main_analysis$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    32.9   871.7  1554.5  1844.4  2543.8  4063.3
summary(yearly_num_Attr_Deaths_main_analysis$death_lb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    27.9   739.9  1319.5  1565.6  2159.2  3449.1
summary(yearly_num_Attr_Deaths_main_analysis$death_ub)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   37.85 1002.22 1787.03 2120.33 2924.32 4671.10

4 Secondary Analysis: Effect of At Least One DIH Prosecution Report in Media on All Overdose Deaths

4.1 Analysis

############################## Secondary Analysis: All Overdose Deaths ###############################
od_all_data <- read.csv("./Data/full_data_set_9_7_21_all_od.csv")
colnames(od_all_data)[which(colnames(od_all_data) == "sum_deaths")] <- "imputed_deaths"
od_all_data$Time_Period_Start <- as.Date(od_all_data$Time_Period_Start)
od_all_data$Intervention_First_Date <- as.Date(od_all_data$Intervention_First_Date)

#initialize vector with "West" and then impute the other regions for the states
od_all_data$Region<-rep("West", nrow(od_all_data))
for(state in unique(od_all_data$State)){
  if(state %in% region.list$Northeast){
    od_all_data$Region[od_all_data$State == state]<-"Northeast"
  }else if(state %in% region.list$Midwest){
    od_all_data$Region[od_all_data$State == state]<-"Midwest"
  }else if(state %in% region.list$South){
    od_all_data$Region[od_all_data$State == state]<-"South"
  }
}

#compute the number of states with intervention by adding up the intervention variable
od_all_data <- od_all_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention = sum(Intervention_Redefined))

#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
od_all_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                    s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                    Naloxone_Pharmacy_Yes_Redefined +
                    Naloxone_Pharmacy_No_Redefined +
                    Medical_Marijuana_Redefined +
                    Recreational_Marijuana_Redefined +
                    GSL_Redefined +
                    PDMP_Redefined +
                    Medicaid_Expansion_Redefined +
                    Intervention_Redefined + 
                    num_states_w_intervention,
                  data = od_all_data, family = "binomial")

#summary output of the model
stargazer(od_all_model, type = "html", dep.var.labels = "All Overdose Deaths")
Dependent variable:
All Overdose Deaths
StateAlaska 0.223***
(0.024)
StateArizona 0.321***
(0.012)
StateArkansas -0.123***
(0.016)
StateCalifornia -0.143***
(0.011)
StateColorado 0.163***
(0.014)
StateConnecticut 0.134***
(0.014)
StateDelaware 0.367***
(0.020)
StateFlorida 0.256***
(0.010)
StateGeorgia -0.123***
(0.012)
StateHawaii -0.107***
(0.021)
StateIdaho -0.028
(0.019)
StateIllinois -0.091***
(0.011)
StateIndiana 0.135***
(0.012)
StateIowa -0.590***
(0.018)
StateKansas -0.205***
(0.016)
StateKentucky 0.519***
(0.012)
StateLouisiana 0.271***
(0.012)
StateMaine 0.152***
(0.019)
StateMaryland 0.371***
(0.012)
StateMassachusetts 0.307***
(0.012)
StateMichigan 0.259***
(0.011)
StateMinnesota -0.455***
(0.015)
StateMississippi -0.145***
(0.016)
StateMissouri 0.201***
(0.013)
StateMontana -0.020
(0.023)
StateNebraska -0.630***
(0.023)
StateNevada 0.426***
(0.014)
StateNew Hampshire 0.257***
(0.018)
StateNew Jersey 0.046***
(0.012)
StateNew Mexico 0.561***
(0.015)
StateNew York -0.205***
(0.011)
StateNorth Carolina 0.125***
(0.011)
StateNorth Dakota -0.875***
(0.037)
StateOhio 0.374***
(0.011)
StateOklahoma 0.307***
(0.013)
StateOregon 0.109***
(0.014)
StatePennsylvania 0.364***
(0.011)
StateRhode Island 0.319***
(0.019)
StateSouth Carolina 0.126***
(0.013)
StateSouth Dakota -0.694***
(0.033)
StateTennessee 0.396***
(0.011)
StateTexas -0.249***
(0.011)
StateUtah 0.457***
(0.014)
StateVermont -0.021
(0.026)
StateVirginia -0.146***
(0.012)
StateWashington 0.131***
(0.013)
StateWest Virginia 0.770***
(0.014)
StateWisconsin -0.016
(0.013)
StateWyoming 0.061**
(0.028)
Naloxone_Pharmacy_Yes_Redefined 0.009
(0.007)
Naloxone_Pharmacy_No_Redefined 0.005
(0.006)
Medical_Marijuana_Redefined 0.041***
(0.005)
Recreational_Marijuana_Redefined -0.065***
(0.008)
GSL_Redefined 0.011**
(0.005)
PDMP_Redefined 0.016***
(0.005)
Medicaid_Expansion_Redefined 0.112***
(0.005)
Intervention_Redefined 0.020***
(0.005)
num_states_w_intervention 0.014***
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -10.737***
(0.060)
Observations 2,000
Adjusted R2 0.900
Log Likelihood -15,832.420
UBRE 7.507
Note: p<0.1; p<0.05; p<0.01
# gam.check(od_all_model)

4.2 Plots

#create a data frame to store the results and compute the confidence intervals
#initialize the columns
od_all_plot_table<-data.frame(State = od_all_data$State)
od_all_plot_table$Fitted<-rep(NA, nrow(od_all_plot_table))
od_all_plot_table$Observed<-rep(NA, nrow(od_all_plot_table))
od_all_plot_table$Time<-od_all_data$Time_Period_ID
od_all_plot_table$Time_Date<-od_all_data$Time_Period_Start
od_all_plot_table$Intervention_Date<-od_all_data$Intervention_First_Date

#we want to compare the fitted probability of overdose death and the observed values to see how the model does as a goodness of fit visual test
for(i in unique(od_all_plot_table$State)){
  #for each state, we first subset the main analysis data to only look at the data for that state
  index_of_state<-which(od_all_plot_table$State == i)
  #impute the fitted and observed probability of overdose death for the state
  od_all_plot_table$Fitted[index_of_state]<-fitted(od_all_model)[index_of_state]
  od_all_plot_table$Observed[index_of_state]<-(od_all_data$imputed_deaths[od_all_data$State == i]/od_all_data$population[od_all_data$State == i])
}
#plot to compare the fitted values vs observed deaths
ggplot(data = od_all_plot_table) + 
  geom_point(aes(x = Time_Date, y = Observed*100000, group = 1, color = "Observed"), size = 0.5, alpha = 0.5) +
  geom_line(aes(x = Time_Date, y = Observed*100000, group = 1, color = "Observed"))+
  geom_point(data = od_all_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1, col = "Estimate"), size = 0.5, alpha = 0.5) + 
  geom_line(data = od_all_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1, col = "Estimate")) +
  geom_vline(od_all_plot_table, mapping = aes(xintercept = Intervention_Date)) +
  facet_wrap(facets = vars(State)) +
  scale_color_manual(values = c("pink", "blue")) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7), axis.text.y = element_text(size = 7),
        axis.title=element_text(size = 15,face = "bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(size=8),panel.background = element_rect("white"),
        legend.position = "bottom") +
  labs(x = "Year", y = "Drug Overdose Death Rates per 100,000 People", color = "")

#plot to show the time smooth effects by region
plot(od_all_model, ylab = "Smoothed Time Effects", xlab = "Time", pages = 1)

#diagnostic plots to check model
gam.check(od_all_model)

## 
## Method: UBRE   Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-6.991359e-08,1.968549e-06]
## (score 7.507021 & scale 1).
## Hessian positive definite, eigenvalue range [3.584438e-05,0.000246841].
## Model rank =  95 / 95 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                k'  edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest   9.00 8.86    1.04    0.98
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.97    1.04    0.97
## s(Time_Period_ID):as.factor(Region)South     9.00 8.89    1.04    0.95
## s(Time_Period_ID):as.factor(Region)West      9.00 8.70    1.04    0.97
plot(logit(fitted(od_all_model)), residuals(od_all_model),
     xlab = "Logit(Fitted Values)", ylab = "Deviance Residuals" )

4.3 Compile Results

############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
od_all_full_table<-data.frame(coef(od_all_model))
#check to see how the table looks
head(od_all_full_table)
##                 coef.od_all_model.
## (Intercept)            -10.7371006
## StateAlaska              0.2226814
## StateArizona             0.3208871
## StateArkansas           -0.1230679
## StateCalifornia         -0.1431041
## StateColorado            0.1629195
#rename the column to "Coefficient_Estimate"
colnames(od_all_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined", 
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(od_all_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(od_all_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(od_all_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(od_all_full_table)[i]<-substr(rownames(od_all_full_table)[i], start = 6,
                                             stop = nchar(rownames(od_all_full_table)[i]))

    }else if(rownames(od_all_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(od_all_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(od_all_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(od_all_full_table)[i]<-paste("Smoothed Time for Region ",
                                            substr(rownames(od_all_full_table)[i], start = 36,
                                                   stop = nchar(rownames(od_all_full_table)[i])),
                                            sep = "")

    }
  }
}

#confidence intervals for the coefficients
od_all_full_table$Coefficient_Lower_Bound<-od_all_full_table$Coefficient_Estimate - 1.96*summary(od_all_model)$se
od_all_full_table$Coefficient_Upper_Bound<-od_all_full_table$Coefficient_Estimate + 1.96*summary(od_all_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
od_all_full_table$Odds_Ratio<-exp(od_all_full_table$Coefficient_Estimate)
od_all_full_table$Odds_Ratio_LB<-exp(od_all_full_table$Coefficient_Lower_Bound)
od_all_full_table$Odds_Ratio_UB<-exp(od_all_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
od_all_full_table$Standard_Error<-summary(od_all_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
od_all_full_table$p_value<-c(summary(od_all_model)$p.pv, 
                             rep(NA, length(coef(od_all_model)) - 
                                   length(summary(od_all_model)$p.pv)))

head(od_all_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -10.7371006             -10.8556442
## Alaska                       0.2226814               0.1749174
## Arizona                      0.3208871               0.2973683
## Arkansas                    -0.1230679              -0.1537843
## California                  -0.1431041              -0.1655486
## Colorado                     0.1629195               0.1359987
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama            -10.61855696 2.172383e-05  1.929539e-05
## Alaska                         0.27044533 1.249422e+00  1.191148e+00
## Arizona                        0.34440588 1.378350e+00  1.346311e+00
## Arkansas                      -0.09235162 8.842036e-01  8.574570e-01
## California                    -0.12065954 8.666638e-01  8.474286e-01
## Colorado                       0.18984025 1.176942e+00  1.145680e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  2.445791e-05     0.06048143  0.000000e+00
## Alaska             1.310548e+00     0.02436937  6.376260e-20
## Arizona            1.411151e+00     0.01199937 1.530388e-157
## Arkansas           9.117845e-01     0.01567159  4.064260e-15
## California         8.863357e-01     0.01145130  7.776242e-36
## Colorado           1.209056e+00     0.01373510  1.874870e-32
tail(od_all_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4          0.123135742              0.09293469
## Smoothed Time for Region West.5         -0.008299928             -0.06586925
## Smoothed Time for Region West.6         -0.085185636             -0.16641537
## Smoothed Time for Region West.7         -0.120884114             -0.21682692
## Smoothed Time for Region West.8         -0.091545818             -0.20901973
## Smoothed Time for Region West.9          0.044134671             -0.05495089
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4             0.153336799  1.1310379
## Smoothed Time for Region West.5             0.049269391  0.9917344
## Smoothed Time for Region West.6            -0.003955896  0.9183418
## Smoothed Time for Region West.7            -0.024941308  0.8861366
## Smoothed Time for Region West.8             0.025928091  0.9125195
## Smoothed Time for Region West.9             0.143220234  1.0451231
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4     1.0973901     1.1657175     0.01540870
## Smoothed Time for Region West.5     0.9362533     1.0505033     0.02937210
## Smoothed Time for Region West.6     0.8466945     0.9960519     0.04144374
## Smoothed Time for Region West.7     0.8050693     0.9753672     0.04895041
## Smoothed Time for Region West.8     0.8113792     1.0262671     0.05993567
## Smoothed Time for Region West.9     0.9465316     1.1539839     0.05055386
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#save the table into a CSV
# write.csv(round(od_all_full_table,5), "./Data/coefficients_GAM_9_1_20_full_data_all_od.csv")

#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(od_all_full_table) %in% covariates)
od_all_covariate_table<-(round(od_all_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(od_all_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                    "Naloxone_Pharmacy_No",
                                    "Medical_Marijuana",
                                    "Recreational_Marijuana",
                                    "GSL", 
                                    "PDMP", 
                                    "Medicaid_Expansion",
                                    "Intervention", 
                                    "Number of States with DIH Prosecution")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
od_all_covariate_table<-rbind(od_all_covariate_table, od_all_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
od_all_covariate_table<-od_all_covariate_table[,-which(colnames(od_all_covariate_table) %in%
                                                         c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(od_all_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(od_all_covariate_table, 11)
##                                       Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                         1.008540e+00 9.950000e-01
## Naloxone_Pharmacy_No                          1.004760e+00 9.937600e-01
## Medical_Marijuana                             1.042010e+00 1.032090e+00
## Recreational_Marijuana                        9.371300e-01 9.234400e-01
## GSL                                           1.011500e+00 1.001890e+00
## PDMP                                          1.015620e+00 1.006110e+00
## Medicaid_Expansion                            1.118740e+00 1.107440e+00
## Intervention                                  1.019820e+00 1.010750e+00
## Number of States with DIH Prosecution         1.013820e+00 1.009740e+00
## Intercept/Alabama                             2.172383e-05 1.929539e-05
## Alaska                                        1.249422e+00 1.191148e+00
##                                        RR_95_CI_UB     p-value
## Naloxone_Pharmacy_Yes                 1.022250e+00 2.17470e-01
## Naloxone_Pharmacy_No                  1.015880e+00 3.97430e-01
## Medical_Marijuana                     1.052020e+00 0.00000e+00
## Recreational_Marijuana                9.510200e-01 0.00000e+00
## GSL                                   1.021190e+00 1.88600e-02
## PDMP                                  1.025220e+00 1.24000e-03
## Medicaid_Expansion                    1.130140e+00 0.00000e+00
## Intervention                          1.028970e+00 2.00000e-05
## Number of States with DIH Prosecution 1.017910e+00 0.00000e+00
## Intercept/Alabama                     2.445791e-05 0.00000e+00
## Alaska                                1.310548e+00 6.37626e-20
#save the table into a CSV
# write.csv(round(od_all_covariate_table, 5), "./Data/coefficients_covariates_9_6_21_full_data_all_od.csv")

4.4 Attributable Deaths

############################## Number of Overdose Deaths Attributed to Intervention ###############################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_od_all<-od_all_data[which(od_all_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_od_all<-expit(-coef(od_all_model)["Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
                             + logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(od_all_model) - 1.96*summary(od_all_model)$se
coef_ub<-coef(od_all_model) + 1.96*summary(od_all_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_od_all<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
                                + logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))

prob_od_no_int_UB_od_all<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
                                + logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))


#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_od_all$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_od_all$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_od_all$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
                          - prob_od_no_int_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
  index<-index + 1
}

num_attr_od_od_all<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_od_all$Time_Period_ID)),
                               "Time_Start" = sort(unique(attr_deaths_anlys_od_all$Time_Period_Start)),
                               "Num_Attr_Deaths" = num_attr_od,
                               "Num_Attr_Deaths_LB" = num_attr_od_LB,
                               "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_od_all$Num_Attr_Deaths) #16336.4
## [1] 13948.84
summary(num_attr_od_od_all$Num_Attr_Deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.022 175.954 309.809 348.721 476.751 742.891
num_attr_od_od_all$Time_Start<-as.Date(num_attr_od_od_all$Time_Start)

#compute the 95% CI for the total
sum(num_attr_od_od_all$Num_Attr_Deaths_LB)
## [1] 7631.079
sum(num_attr_od_od_all$Num_Attr_Deaths_UB)
## [1] 20210.63
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_od_all<-num_attr_od_od_all %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_od_all$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.87  373.89  620.48  697.44  952.41 1428.40
summary(yearly_num_Attr_Deaths_od_all$death_lb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.131 204.535 339.450 381.554 521.050 781.458
summary(yearly_num_Attr_Deaths_od_all$death_ub)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.57  541.77  899.02 1010.53 1379.94 2069.59

5 Sensitivity Analysis 1: Confounding by Indication

5.1 Analysis

############################## Sensitivity Analysis 1: Confounding by Indication Analysis ###############################
#create a variable that is equal to 1 just before intervention
#initialize the column to all zeros
main_analysis_data$justBeforeIntervention<-0

#for each state, we first subset the data so it only contains the state's data
for(state in unique(main_analysis_data$State)){
  state_data<-main_analysis_data[main_analysis_data$State == state,]
  #then, we find the first time point where intervention occurred
  index_first_intervention<-which(state_data$Intervention_Redefined>0)[1]
  #impute a 1 for the time point right before when intervention first occurs
  main_analysis_data$justBeforeIntervention[main_analysis_data$State == state][index_first_intervention-1]<-1
}

#subset the data so that we are only looking at the periods before the intervention occurs for each state
sensitivity_anlys_conf_by_indication_data<-data.frame()
for(state in unique(main_analysis_data$State)){
  state_data<-main_analysis_data[main_analysis_data$State == state,]
  #we don't include these states because Georgia and Ohio have intervention in 2000
  #Hawaii is in this list because it doesn't have an intervention, so we will encounter an error
  #if the state is Hawaii, we'll go to the else if condition
  if(!(state %in% c("Hawaii", "Georgia", "Ohio"))){
    #look for the index where it is just before the intervention
    index_first_intervention<-which(state_data$justBeforeIntervention>0)
    #add the rows that occur before the intervention to the sensitivity analysis data
    sensitivity_anlys_conf_by_indication_data<-rbind(sensitivity_anlys_conf_by_indication_data, state_data[1:(index_first_intervention),])

  }else if(state == "Hawaii"){
    #Hawaii doesn't have an intervention, so we want to include all the rows of Hawaii
    sensitivity_anlys_conf_by_indication_data<-rbind(sensitivity_anlys_conf_by_indication_data, state_data)
  }
}

#run the analysis for the sensitivity analysis
sensitivity_anlys_conf_by_indication<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                            s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                                            Naloxone_Pharmacy_Yes_Redefined +
                                            Naloxone_Pharmacy_No_Redefined +
                                            Medical_Marijuana_Redefined +
                                            Recreational_Marijuana_Redefined +
                                            GSL_Redefined +
                                            PDMP_Redefined +
                                            Medicaid_Expansion_Redefined +
                                            justBeforeIntervention + 
                                            num_states_w_intervention,
                                          data = sensitivity_anlys_conf_by_indication_data, family = "binomial")

stargazer(sensitivity_anlys_conf_by_indication, type = "html", dep.var.labels = "Unintentional Overdose Deaths")
Dependent variable:
Unintentional Overdose Deaths
StateAlaska 0.577***
(0.045)
StateArizona 0.592***
(0.028)
StateArkansas -0.327***
(0.037)
StateCalifornia 0.079*
(0.047)
StateColorado 0.415***
(0.036)
StateConnecticut 1.040***
(0.066)
StateDelaware 0.298***
(0.039)
StateFlorida 0.736***
(0.048)
StateHawaii -0.041
(0.044)
StateIdaho 0.022
(0.038)
StateIllinois 0.835***
(0.040)
StateIndiana 0.032
(0.034)
StateIowa -0.663***
(0.072)
StateKansas 0.035
(0.043)
StateKentucky 0.764***
(0.029)
StateLouisiana 0.502***
(0.041)
StateMaine 0.455***
(0.069)
StateMaryland -1.264***
(0.099)
StateMassachusetts 0.099
(0.061)
StateMichigan 0.117***
(0.037)
StateMinnesota -0.416***
(0.043)
StateMississippi 0.106***
(0.031)
StateMissouri 0.409***
(0.037)
StateMontana -0.133
(0.092)
StateNebraska -0.820***
(0.048)
StateNevada 0.887***
(0.044)
StateNew Hampshire 0.237***
(0.066)
StateNew Jersey 1.096***
(0.066)
StateNew Mexico 1.392***
(0.046)
StateNew York 0.587***
(0.061)
StateNorth Carolina 0.456***
(0.032)
StateNorth Dakota -1.144***
(0.079)
StateOklahoma 0.657***
(0.030)
StateOregon 0.184***
(0.041)
StatePennsylvania 1.379***
(0.072)
StateRhode Island 0.043
(0.068)
StateSouth Carolina 0.294***
(0.029)
StateSouth Dakota -1.030***
(0.065)
StateTennessee 0.571***
(0.029)
StateTexas 0.523***
(0.043)
StateUtah -0.541***
(0.064)
StateVermont 0.166**
(0.079)
StateVirginia 0.405***
(0.039)
StateWashington 0.563***
(0.039)
StateWest Virginia 0.913***
(0.030)
StateWisconsin 0.203***
(0.045)
StateWyoming 0.110*
(0.065)
Naloxone_Pharmacy_Yes_Redefined -0.028
(0.080)
Naloxone_Pharmacy_No_Redefined -0.326***
(0.040)
Medical_Marijuana_Redefined 0.024
(0.023)
Recreational_Marijuana_Redefined -0.402*
(0.231)
GSL_Redefined 0.004
(0.036)
PDMP_Redefined -0.076***
(0.016)
Medicaid_Expansion_Redefined 0.061
(0.066)
justBeforeIntervention -0.080***
(0.012)
num_states_w_intervention -0.015***
(0.005)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -10.818***
(0.095)
Observations 818
Adjusted R2 0.878
Log Likelihood -4,321.874
UBRE 3.509
Note: p<0.1; p<0.05; p<0.01

5.2 Compile Results

############################## Sensitivity Analysis 1: Make Data Frame of Results and 95% CI ###############################
#store the coefficients of all the terms into a table
sensitivity_anlys_conf_by_indication_full_table<-data.frame(coef(sensitivity_anlys_conf_by_indication))
head(sensitivity_anlys_conf_by_indication_full_table)
##                 coef.sensitivity_anlys_conf_by_indication.
## (Intercept)                                   -10.81776480
## StateAlaska                                     0.57651492
## StateArizona                                    0.59213868
## StateArkansas                                  -0.32710781
## StateCalifornia                                 0.07852328
## StateColorado                                   0.41534935
#change the column name to "Coefficient_Estimate"
colnames(sensitivity_anlys_conf_by_indication_full_table)<-c("Coefficient_Estimate")

#store a vector of the covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "justBeforeIntervention",
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_conf_by_indication_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_conf_by_indication_full_table)[i] %in% covariates)){

    #we see if it is a state effect
    if(substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i],
                                                                           start = 6,
                                                                           stop =
                                                                        nchar(rownames(sensitivity_anlys_conf_by_indication_full_table)[i]))

    }else if(rownames(sensitivity_anlys_conf_by_indication_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                          substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], 
                                                                                 start = 36, 
                                                                                 stop = 
                                                                      nchar(rownames(sensitivity_anlys_conf_by_indication_full_table)[i])),
                                                                          sep = "")

    }
  }
}

#store the 95% CI
sensitivity_anlys_conf_by_indication_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_conf_by_indication)$se
sensitivity_anlys_conf_by_indication_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_conf_by_indication)$se

#store the odds ratio estimates and 95% CI
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate)
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Upper_Bound)

#store the standard error and p-values
sensitivity_anlys_conf_by_indication_full_table$Standard_Error<-summary(sensitivity_anlys_conf_by_indication)$se
#since there are no p-values for the smoothed time effects, it imputes NA for those rows
sensitivity_anlys_conf_by_indication_full_table$p_value<-c(summary(sensitivity_anlys_conf_by_indication)$p.pv,
                                                           rep(NA, length(coef(sensitivity_anlys_conf_by_indication)) - length(summary(sensitivity_anlys_conf_by_indication)$p.pv)))

head(sensitivity_anlys_conf_by_indication_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama         -10.81776480            -11.00298846
## Alaska                      0.57651492              0.48842192
## Arizona                     0.59213868              0.53755922
## Arkansas                   -0.32710781             -0.39949178
## California                  0.07852328             -0.01414272
## Colorado                    0.41534935              0.34512748
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama             -10.6325411 2.004031e-05  1.665186e-05
## Alaska                          0.6646079 1.779825e+00  1.629742e+00
## Arizona                         0.6467181 1.807851e+00  1.711824e+00
## Arkansas                       -0.2547238 7.210060e-01  6.706608e-01
## California                      0.1711893 1.081689e+00  9.859568e-01
## Colorado                        0.4855712 1.514900e+00  1.412170e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  2.411826e-05     0.09450187  0.000000e+00
## Alaska             1.943728e+00     0.04494541  1.157541e-37
## Arizona            1.909265e+00     0.02784666 2.432977e-100
## Arkansas           7.751305e-01     0.03693060  8.192736e-19
## California         1.186715e+00     0.04727857  9.674075e-02
## Colorado           1.625103e+00     0.03582749  4.469988e-31
tail(sensitivity_anlys_conf_by_indication_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.5480386               0.3953717
## Smoothed Time for Region West.5            0.6895569               0.4702550
## Smoothed Time for Region West.6            0.8675583               0.5912600
## Smoothed Time for Region West.7            1.0897881               0.7670357
## Smoothed Time for Region West.8            1.3401909               0.9777689
## Smoothed Time for Region West.9            1.5683515               1.1666629
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.7007054   1.729857
## Smoothed Time for Region West.5               0.9088587   1.992832
## Smoothed Time for Region West.6               1.1438565   2.381090
## Smoothed Time for Region West.7               1.4125405   2.973644
## Smoothed Time for Region West.8               1.7026129   3.819773
## Smoothed Time for Region West.9               1.9700401   4.798731
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.484936      2.015174     0.07789125
## Smoothed Time for Region West.5      1.600402      2.481489     0.11188871
## Smoothed Time for Region West.6      1.806263      3.138850     0.14096850
## Smoothed Time for Region West.7      2.153374      4.106374     0.16466958
## Smoothed Time for Region West.8      2.658518      5.488269     0.18490917
## Smoothed Time for Region West.9      3.211258      7.170964     0.20494317
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export the sensitivity analysis confounding by indication full table of estimates as CSV
# write.csv(round(sensitivity_anlys_conf_by_indication_full_table,3), "./Data/coefficients_JustBeforeInd_9_6_21_full_data.csv")

#export out a table with just the covariates:
#find the rows in the full table which contain estimates for the covariates and extract them
covariate_Index<-which(rownames(sensitivity_anlys_conf_by_indication_full_table) %in% covariates)
sensitivity_anlys_conf_by_indication_covariate_table<-(round(sensitivity_anlys_conf_by_indication_full_table[covariate_Index,],5))

#rename these variables so they look nicer
rownames(sensitivity_anlys_conf_by_indication_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                                  "Naloxone_Pharmacy_No",
                                                                  "Medical_Marijuana",
                                                                  "Recreational_Marijuana",
                                                                  "GSL", 
                                                                  "PDMP",
                                                                  "Medicaid_Expansion",
                                                                  "Just Before Indicator", 
                                                                  "Number of States w DIH Prosecution")

#put the covariate rows on top and the rest of the data at the bottom
sensitivity_anlys_conf_by_indication_covariate_table<-rbind(sensitivity_anlys_conf_by_indication_covariate_table,
                                                            sensitivity_anlys_conf_by_indication_full_table[-covariate_Index,])

#extract the columns that gives the odds ratio estimates
sensitivity_anlys_conf_by_indication_covariate_table<-sensitivity_anlys_conf_by_indication_covariate_table[,-which(colnames(sensitivity_anlys_conf_by_indication_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sensitivity_anlys_conf_by_indication_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_conf_by_indication_covariate_table, 10)
##                                    Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                      9.725100e-01 8.321400e-01
## Naloxone_Pharmacy_No                       7.214800e-01 6.674600e-01
## Medical_Marijuana                          1.023930e+00 9.781300e-01
## Recreational_Marijuana                     6.689300e-01 4.249600e-01
## GSL                                        1.003840e+00 9.358900e-01
## PDMP                                       9.268700e-01 8.980600e-01
## Medicaid_Expansion                         1.062920e+00 9.342700e-01
## Just Before Indicator                      9.233800e-01 9.011900e-01
## Number of States w DIH Prosecution         9.850500e-01 9.746300e-01
## Intercept/Alabama                          2.004031e-05 1.665186e-05
##                                     RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes              1.136550e+00 0.72593
## Naloxone_Pharmacy_No               7.798700e-01 0.00000
## Medical_Marijuana                  1.071870e+00 0.31111
## Recreational_Marijuana             1.052960e+00 0.08238
## GSL                                1.076720e+00 0.91465
## PDMP                               9.566100e-01 0.00000
## Medicaid_Expansion                 1.209290e+00 0.35391
## Just Before Indicator              9.461100e-01 0.00000
## Number of States w DIH Prosecution 9.955900e-01 0.00555
## Intercept/Alabama                  2.411826e-05 0.00000
#export the covariate table into a CSV file
# write.csv(round(sensitivity_anlys_conf_by_indication_covariate_table,3), "./Data/covariates_just_before_intervention_9_6_21_full_data.csv")

6 Sensitivity Analysis 2: Exclude States with At Least 75% Missing Monthly Data

6.1 Analysis

############################## Exclude States from Analysis Based on Missing Data ###############################
#subset the data to be between 2000 and 2019
overdose_monthly_imputed<-read.csv("./Data/od_data_interpolated_unintentional_1999_2019_age_17_up_9_6_21.csv")
od_2000_to_2019<-overdose_monthly_imputed[overdose_monthly_imputed$Year>=2000 & overdose_monthly_imputed$Year<=2019,]

#convert the date to Date objects and the number of deaths to numeric
od_2000_to_2019$Date<-as.Date(as.yearmon(od_2000_to_2019$Month.Code, format = "%Y/%m"))
od_2000_to_2019$Deaths<-as.numeric(od_2000_to_2019$Deaths)

#plot to look at the trend of the outcome and see how much data is missing
ggplot(od_2000_to_2019, aes(x = Date, y = Deaths)) + geom_line() + facet_wrap(vars(State))

#find the states where the proportion of monthly missing data for years 2000 to 2017 is greater than 75%
statesToExcludeCriteria<-sapply(unique(od_2000_to_2019$State), function(state){
  mean(is.na(od_2000_to_2019$Deaths[od_2000_to_2019$State == state]))>.75})

statesToExclude<-unique(od_2000_to_2019$State)[statesToExcludeCriteria]
#states excluded: [1] "Alaska"       "Montana"      "Nebraska"     "North Dakota" "South Dakota" "Vermont" "Wyoming"

#calculate the median (and IQR) of missingnness from resulting data
summary_missingness <- od_2000_to_2019 %>% 
  group_by(State) %>% 
  summarise(missing = mean(is.na(Deaths)))

median(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
## [1] 0.02083333
IQR(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
## [1] 0.1979167
summary(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.02083 0.12791 0.19792 0.64583
#include only the states that are not excluded
sensitivity_anlys_exclude_states_data<-main_analysis_data[!(main_analysis_data$State %in% statesToExclude), ]


sensitivity_anlys_exclude_states_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                              s(Time_Period_ID, bs = "cr", by = as.factor(Region))  +
                                              Naloxone_Pharmacy_Yes_Redefined +
                                              Naloxone_Pharmacy_No_Redefined +
                                              Medical_Marijuana_Redefined +
                                              Recreational_Marijuana_Redefined +
                                              GSL_Redefined +
                                              PDMP_Redefined +
                                              Medicaid_Expansion_Redefined +
                                              Intervention_Redefined + 
                                              num_states_w_intervention,
                                            data = sensitivity_anlys_exclude_states_data, family = "binomial")
stargazer(sensitivity_anlys_exclude_states_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateArizona 0.307***
(0.014)
StateArkansas -0.393***
(0.020)
StateCalifornia -0.172***
(0.013)
StateColorado 0.096***
(0.016)
StateConnecticut 0.187***
(0.016)
StateDelaware 0.442***
(0.022)
StateFlorida 0.268***
(0.012)
StateGeorgia -0.087***
(0.013)
StateHawaii -0.213***
(0.026)
StateIdaho -0.159***
(0.024)
StateIllinois -0.023*
(0.013)
StateIndiana 0.078***
(0.014)
StateIowa -0.746***
(0.021)
StateKansas -0.343***
(0.019)
StateKentucky 0.641***
(0.014)
StateLouisiana 0.283***
(0.014)
StateMaine 0.167***
(0.022)
StateMaryland -1.069***
(0.019)
StateMassachusetts 0.220***
(0.014)
StateMichigan -0.021
(0.014)
StateMinnesota -0.625***
(0.017)
StateMississippi -0.109***
(0.018)
StateMissouri 0.192***
(0.015)
StateNevada 0.433***
(0.017)
StateNew Hampshire 0.273***
(0.020)
StateNew Jersey 0.105***
(0.013)
StateNew Mexico 0.617***
(0.017)
StateNew York -0.241***
(0.013)
StateNorth Carolina 0.175***
(0.013)
StateOhio 0.451***
(0.012)
StateOklahoma 0.377***
(0.015)
StateOregon -0.184***
(0.018)
StatePennsylvania 0.442***
(0.012)
StateRhode Island 0.255***
(0.022)
StateSouth Carolina 0.228***
(0.015)
StateTennessee 0.437***
(0.013)
StateTexas -0.233***
(0.012)
StateUtah 0.010
(0.018)
StateVirginia -0.113***
(0.014)
StateWashington 0.072***
(0.015)
StateWest Virginia 0.897***
(0.015)
StateWisconsin -0.039***
(0.015)
Naloxone_Pharmacy_Yes_Redefined -0.022***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.015**
(0.007)
Medical_Marijuana_Redefined 0.062***
(0.006)
Recreational_Marijuana_Redefined -0.040***
(0.009)
GSL_Redefined 0.035***
(0.006)
PDMP_Redefined -0.017***
(0.006)
Medicaid_Expansion_Redefined 0.100***
(0.006)
Intervention_Redefined 0.073***
(0.006)
num_states_w_intervention 0.015***
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -11.075***
(0.071)
Observations 1,720
Adjusted R2 0.914
Log Likelihood -15,794.380
UBRE 10.135
Note: p<0.1; p<0.05; p<0.01

6.2 Compile Results

############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
sensitivity_anlys_exclude_states_full_table<-data.frame(coef(sensitivity_anlys_exclude_states_model))
#check to see how the table looks
head(sensitivity_anlys_exclude_states_full_table)
##                  coef.sensitivity_anlys_exclude_states_model.
## (Intercept)                                      -11.07477025
## StateArizona                                       0.30682384
## StateArkansas                                     -0.39348967
## StateCalifornia                                   -0.17225255
## StateColorado                                      0.09558043
## StateConnecticut                                   0.18720667
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_exclude_states_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined",
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_exclude_states_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_exclude_states_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_exclude_states_full_table)[i]<-substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], 
                                                                       start = 6,
                                                                       stop = nchar(rownames(sensitivity_anlys_exclude_states_full_table)[i]))

    }else if(rownames(sensitivity_anlys_exclude_states_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_exclude_states_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_exclude_states_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                      substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], 
                                                                             start = 36,
                                                                             stop = nchar(rownames(sensitivity_anlys_exclude_states_full_table)[i])),
                                                                      sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_exclude_states_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate -
  1.96*summary(sensitivity_anlys_exclude_states_model)$se
sensitivity_anlys_exclude_states_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate +
  1.96*summary(sensitivity_anlys_exclude_states_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_exclude_states_full_table$Odds_Ratio<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate)
sensitivity_anlys_exclude_states_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_exclude_states_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_exclude_states_full_table$Standard_Error<-summary(sensitivity_anlys_exclude_states_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_exclude_states_full_table$p_value<-c(summary(sensitivity_anlys_exclude_states_model)$p.pv,
                                                       rep(NA, length(coef(sensitivity_anlys_exclude_states_model)) -
                                                             length(summary(sensitivity_anlys_exclude_states_model)$p.pv)))

head(sensitivity_anlys_exclude_states_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama         -11.07477025            -11.21470354
## Arizona                     0.30682384              0.27946297
## Arkansas                   -0.39348967             -0.43202733
## California                 -0.17225255             -0.19830774
## Colorado                    0.09558043              0.06407939
## Connecticut                 0.18720667              0.15666837
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama             -10.9348370 1.549845e-05  1.347461e-05
## Arizona                         0.3341847 1.359102e+00  1.322419e+00
## Arkansas                       -0.3549520 6.746983e-01  6.491916e-01
## California                     -0.1461974 8.417666e-01  8.201174e-01
## Colorado                        0.1270815 1.100297e+00  1.066177e+00
## Connecticut                     0.2177450 1.205876e+00  1.169608e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  1.782628e-05     0.07139454  0.000000e+00
## Arizona            1.396801e+00     0.01395963 4.536909e-107
## Arkansas           7.012071e-01     0.01966207  4.275400e-89
## California         8.639872e-01     0.01329346  2.125547e-38
## Colorado           1.135510e+00     0.01607196  2.730531e-09
## Connecticut        1.243270e+00     0.01558077  2.955082e-33
tail(sensitivity_anlys_exclude_states_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4           0.15718859              0.12096209
## Smoothed Time for Region West.5           0.03274450             -0.03534830
## Smoothed Time for Region West.6          -0.05818740             -0.15426198
## Smoothed Time for Region West.7          -0.04810688             -0.16155605
## Smoothed Time for Region West.8           0.00880256             -0.12992106
## Smoothed Time for Region West.9           0.18347551              0.06652154
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4              0.19341510  1.1702163
## Smoothed Time for Region West.5              0.10083730  1.0332865
## Smoothed Time for Region West.6              0.03788719  0.9434731
## Smoothed Time for Region West.7              0.06534228  0.9530319
## Smoothed Time for Region West.8              0.14752618  1.0088414
## Smoothed Time for Region West.9              0.30042949  1.2013855
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4     1.1285821      1.213386     0.01848291
## Smoothed Time for Region West.5     0.9652692      1.106097     0.03474122
## Smoothed Time for Region West.6     0.8570475      1.038614     0.04901765
## Smoothed Time for Region West.7     0.8508188      1.067524     0.05788223
## Smoothed Time for Region West.8     0.8781647      1.158964     0.07077736
## Smoothed Time for Region West.9     1.0687840      1.350439     0.05967039
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_exclude_states_full_table) %in% covariates)
sensitivity_anlys_exclude_states_covariate_table<-(round(sensitivity_anlys_exclude_states_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_exclude_states_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                              "Naloxone_Pharmacy_No",
                                                              "Medical_Marijuana",
                                                              "Recreational_Marijuana",
                                                              "GSL", 
                                                              "PDMP", 
                                                              "Medicaid_Expansion",
                                                              "Intervention_Redefined",
                                                              "Number of States w DIH Prosecution")

#now, reorganize the data so that the covariates are on top and the rest of the variables are below
sensitivity_anlys_exclude_states_covariate_table<-rbind(sensitivity_anlys_exclude_states_covariate_table, sensitivity_anlys_exclude_states_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_exclude_states_covariate_table<-sensitivity_anlys_exclude_states_covariate_table[,-which(colnames(sensitivity_anlys_exclude_states_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sensitivity_anlys_exclude_states_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_exclude_states_covariate_table, 10)
##                                    Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                      9.777600e-01 9.628600e-01
## Naloxone_Pharmacy_No                       1.015010e+00 1.002020e+00
## Medical_Marijuana                          1.063590e+00 1.051790e+00
## Recreational_Marijuana                     9.605900e-01 9.442300e-01
## GSL                                        1.035890e+00 1.024430e+00
## PDMP                                       9.827600e-01 9.714800e-01
## Medicaid_Expansion                         1.105170e+00 1.091970e+00
## Intervention_Redefined                     1.075390e+00 1.063840e+00
## Number of States w DIH Prosecution         1.014670e+00 1.009850e+00
## Intercept/Alabama                          1.549845e-05 1.347461e-05
##                                     RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes              9.928900e-01 0.00409
## Naloxone_Pharmacy_No               1.028170e+00 0.02341
## Medical_Marijuana                  1.075530e+00 0.00000
## Recreational_Marijuana             9.772400e-01 0.00000
## GSL                                1.047480e+00 0.00000
## PDMP                               9.941700e-01 0.00315
## Medicaid_Expansion                 1.118520e+00 0.00000
## Intervention_Redefined             1.087070e+00 0.00000
## Number of States w DIH Prosecution 1.019510e+00 0.00000
## Intercept/Alabama                  1.782628e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_exclude_states_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_exclude_states.csv")

6.3 Attributable Deaths

###################################### Attributable Deaths #############################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_exclude_states<-sensitivity_anlys_exclude_states_data[which(sensitivity_anlys_exclude_states_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_exclude_states<-expit(-coef(sensitivity_anlys_exclude_states_model)["Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
                                     + logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_exclude_states_model) - 1.96*summary(sensitivity_anlys_exclude_states_model)$se
coef_ub<-coef(sensitivity_anlys_exclude_states_model) + 1.96*summary(sensitivity_anlys_exclude_states_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_exclude_states<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
                                        + logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))

prob_od_no_int_UB_exclude_states<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
                                        + logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))

#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_exclude_states_data$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_exclude_states$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_exclude_states$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
                          - prob_od_no_int_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])

  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])


  index<-index + 1
}

num_attr_od_exclude_states<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_exclude_states$Time_Period_ID)),
                                       "Time_Start" = sort(unique(attr_deaths_anlys_exclude_states$Time_Period_Start)),
                                       "Num_Attr_Deaths" = num_attr_od,
                                       "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                       "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_exclude_states$Num_Attr_Deaths)
## [1] 38805.66
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_exclude_states<-num_attr_od_exclude_states %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_exclude_states$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    34.9   920.8  1638.2  1940.3  2672.5  4263.1

7 Sensitivity Analysis 3: Divide Unintentional Deaths Equally Amongst Missing Months

7.1 Analysis

### Sensitivity Analysis 3: Divide Unaccountable Deaths Equally Among Missing Months #####
od_data <- read.csv("./Data/unintentional_od_1999_2019_age_17_up.txt", sep = "\t", stringsAsFactors = FALSE)
od_data$Deaths <- as.numeric(od_data$Deaths)
od_data<-od_data[!is.na(od_data$Year),] #delete the rows that just contains data set description info
tail(od_data)
##       Notes   State State.Code Year Year.Code      Month Month.Code Deaths
## 12595       Wyoming         56 2019      2019 Jul., 2019    2019/07     NA
## 12596       Wyoming         56 2019      2019 Aug., 2019    2019/08     NA
## 12597       Wyoming         56 2019      2019 Sep., 2019    2019/09     NA
## 12598       Wyoming         56 2019      2019 Oct., 2019    2019/10     NA
## 12599       Wyoming         56 2019      2019 Nov., 2019    2019/11     NA
## 12600       Wyoming         56 2019      2019 Dec., 2019    2019/12     NA
##           Population     Crude.Rate
## 12595 Not Applicable Not Applicable
## 12596 Not Applicable Not Applicable
## 12597 Not Applicable Not Applicable
## 12598 Not Applicable Not Applicable
## 12599 Not Applicable Not Applicable
## 12600 Not Applicable Not Applicable
sum(is.na(od_data$Deaths))
## [1] 3148
#set up the overdose data to impute the missing data
#set up the dates
od_data$Date<-mdy(od_data$Month)
length(unique(od_data$State))
## [1] 50
#DC is being counted in this, but we do not have data on prosecutions. Remove these rows
od_data<-od_data[od_data$State!="District of Columbia",]

#interpolate the missing values
od_year <- read.csv("./Data/unintentional_od_yearly_1999_2019_age_17_up.txt", sep = "\t", stringsAsFactors = FALSE)
od_year$Deaths <- as.numeric(od_year$Deaths)
head(od_year, 50)
##    Notes   State State.Code Year Year.Code Deaths Population Crude.Rate
## 1        Alabama          1 1999      1999    115    3308855        3.5
## 2        Alabama          1 2000      2000    126    3323678        3.8
## 3        Alabama          1 2001      2001    163    3347225        4.9
## 4        Alabama          1 2002      2002    167    3363499        5.0
## 5        Alabama          1 2003      2003    150    3390408        4.4
## 6        Alabama          1 2004      2004    220    3417067        6.4
## 7        Alabama          1 2005      2005    215    3452576        6.2
## 8        Alabama          1 2006      2006    308    3502183        8.8
## 9        Alabama          1 2007      2007    416    3540544       11.7
## 10       Alabama          1 2008      2008    482    3583279       13.5
## 11       Alabama          1 2009      2009    535    3623746       14.8
## 12       Alabama          1 2010      2010    462    3647277       12.7
## 13       Alabama          1 2011      2011    484    3675597       13.2
## 14       Alabama          1 2012      2012    473    3697617       12.8
## 15       Alabama          1 2013      2013    520    3722241       14.0
## 16       Alabama          1 2014      2014    637    3741806       17.0
## 17       Alabama          1 2015      2015    653    3755483       17.4
## 18       Alabama          1 2016      2016    680    3766477       18.1
## 19       Alabama          1 2017      2017    750    3779274       19.8
## 20       Alabama          1 2018      2018    683    3798031       18.0
## 21       Alabama          1 2019      2019    695    3814879       18.2
## 22 Total Alabama          1   NA        NA   8934   75251742       11.9
## 23        Alaska          2 1999      1999     27     433357        6.2
## 24        Alaska          2 2000      2000     35     436215        8.0
## 25        Alaska          2 2001      2001     52     444943       11.7
## 26        Alaska          2 2002      2002     69     453855       15.2
## 27        Alaska          2 2003      2003     69     461571       14.9
## 28        Alaska          2 2004      2004     62     472951       13.1
## 29        Alaska          2 2005      2005     61     481642       12.7
## 30        Alaska          2 2006      2006     66     489722       13.5
## 31        Alaska          2 2007      2007     63     495956       12.7
## 32        Alaska          2 2008      2008     88     504331       17.4
## 33        Alaska          2 2009      2009     99     512544       19.3
## 34        Alaska          2 2010      2010     74     522853       14.2
## 35        Alaska          2 2011      2011     94     534277       17.6
## 36        Alaska          2 2012      2012    112     544349       20.6
## 37        Alaska          2 2013      2013     92     547000       16.8
## 38        Alaska          2 2014      2014    103     550189       18.7
## 39        Alaska          2 2015      2015    105     552166       19.0
## 40        Alaska          2 2016      2016    102     554567       18.4
## 41        Alaska          2 2017      2017    123     554867       22.2
## 42        Alaska          2 2018      2018     94     553622       17.0
## 43        Alaska          2 2019      2019    112     551562       20.3
## 44 Total  Alaska          2   NA        NA   1702   10652539       16.0
## 45       Arizona          4 1999      1999    360    3691428        9.8
## 46       Arizona          4 2000      2000    397    3763685       10.5
## 47       Arizona          4 2001      2001    418    3874462       10.8
## 48       Arizona          4 2002      2002    457    3968317       11.5
## 49       Arizona          4 2003      2003    507    4056693       12.5
## 50       Arizona          4 2004      2004    537    4167950       12.9
#see how many states have missing yearly entries and use the totals to impute the missing yearly values.
sum_na <- od_year %>% group_by(State) %>% summarise(sum(is.na(Deaths)))
table(sum_na)
##                 sum(is.na(Deaths))
## State            0 4 5 72
##                  0 0 0  1
##   Alabama        1 0 0  0
##   Alaska         1 0 0  0
##   Arizona        1 0 0  0
##   Arkansas       1 0 0  0
##   California     1 0 0  0
##   Colorado       1 0 0  0
##   Connecticut    1 0 0  0
##   Delaware       1 0 0  0
##   Florida        1 0 0  0
##   Georgia        1 0 0  0
##   Hawaii         1 0 0  0
##   Idaho          1 0 0  0
##   Illinois       1 0 0  0
##   Indiana        1 0 0  0
##   Iowa           1 0 0  0
##   Kansas         1 0 0  0
##   Kentucky       1 0 0  0
##   Louisiana      1 0 0  0
##   Maine          1 0 0  0
##   Maryland       1 0 0  0
##   Massachusetts  1 0 0  0
##   Michigan       1 0 0  0
##   Minnesota      1 0 0  0
##   Mississippi    1 0 0  0
##   Missouri       1 0 0  0
##   Montana        1 0 0  0
##   Nebraska       1 0 0  0
##   Nevada         1 0 0  0
##   New Hampshire  1 0 0  0
##   New Jersey     1 0 0  0
##   New Mexico     1 0 0  0
##   New York       1 0 0  0
##   North Carolina 1 0 0  0
##   North Dakota   0 1 0  0
##   Ohio           1 0 0  0
##   Oklahoma       1 0 0  0
##   Oregon         1 0 0  0
##   Pennsylvania   1 0 0  0
##   Rhode Island   0 1 0  0
##   South Carolina 1 0 0  0
##   South Dakota   0 0 1  0
##   Tennessee      1 0 0  0
##   Texas          1 0 0  0
##   Utah           1 0 0  0
##   Vermont        1 0 0  0
##   Virginia       1 0 0  0
##   Washington     1 0 0  0
##   West Virginia  1 0 0  0
##   Wisconsin      1 0 0  0
##   Wyoming        1 0 0  0
#the 3 states are: North Dakota, South Dakota, and Rhode Island
od_year$Deaths[od_year$State == "North Dakota" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "North Dakota" & 
                                                                                             od_year$Notes == "Total"] -
                                                                              sum(od_year$Deaths[od_year$State == "North Dakota" &
                                                                                                   od_year$Notes != "Total"], 
                                                                                  na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "North Dakota"]))
od_year$Deaths[od_year$State == "South Dakota" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "South Dakota" &
                                                                                             od_year$Notes == "Total"] -
                                                                              sum(od_year$Deaths[od_year$State == "South Dakota" &
                                                                                                   od_year$Notes != "Total"], 
                                                                                  na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "South Dakota"]))
od_year$Deaths[od_year$State == "Rhode Island" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "Rhode Island" & 
                                                                                             od_year$Notes == "Total"] -
                                                                              sum(od_year$Deaths[od_year$State == "Rhode Island" &
                                                                                                   od_year$Notes != "Total"], 
                                                                                  na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "Rhode Island"]))
od_year <- od_year[!is.na(od_year$Year),]
tail(od_year)
##      Notes   State State.Code Year Year.Code Deaths Population Crude.Rate
## 1094       Wyoming         56 2014      2014     87     445830       19.5
## 1095       Wyoming         56 2015      2015     79     447212       17.7
## 1096       Wyoming         56 2016      2016     79     446600       17.7
## 1097       Wyoming         56 2017      2017     53     442832       12.0
## 1098       Wyoming         56 2018      2018     54     442962       12.2
## 1099       Wyoming         56 2019      2019     68     445025       15.3
od_data$imputed_vals<-rep(NA, nrow(od_data))

startYear<-1999

for(state in unique(od_data$State)){
  #get the values of the deaths for state
  currentDeaths<-od_data$Deaths[od_data$State == state]
  for(year in startYear:2019){
    #find the indices of the missing data -- gets the missing indices for that particular year
    indexMissing <- which(is.na(currentDeaths[(1:12) + 12*(year - startYear)]))
    if(length(indexMissing) != 0){
      #if there are missing values, we find the number of accounted deaths
      currentDeathsTotal <- sum(currentDeaths[(1:12) + 12*(year - startYear)], na.rm = TRUE)
      #and calculate the deaths that are not accounted for using the yearly deaths for the state
      numNotAccounted <- od_year$Deaths[od_year$State == state & od_year$Year == year] - currentDeathsTotal

      #we then divide number of unaccounted deaths evenly by the number of months with missing values
      currentDeaths[(1:12) + 12*(year - startYear)][indexMissing] <- numNotAccounted/length(indexMissing)
    }else{
      #otherwise, if there is no missing values, we skip to the next year
      next
    }
  }
  #store the imputed values
  od_data$imputed_vals[od_data$State == state]<-currentDeaths
}

#group into 6 month time periods now and compute the total number of deaths in each period
od_data_grouped_data <- od_data %>%
  mutate(Time_Period_Start = lubridate::floor_date(Date , "6 months" )) %>%
  group_by(State, Time_Period_Start) %>%
  summarise(sum_deaths = sum(imputed_vals, na.rm = TRUE))

#restrict the dataset to be between 2000 and 2017
od_data_grouped_data <- od_data_grouped_data[year(od_data_grouped_data$Time_Period_Start) > 1999 &
                     year(od_data_grouped_data$Time_Period_Start) < 2020,]

#create a new dataset for the analysis using columns from the main analysis data
sensitivity_anlys_imputed_od_wo_interp_data <- main_analysis_data %>%
  ungroup() %>%
  mutate(imputed_deaths_no_interp = od_data_grouped_data$sum_deaths,
         num_alive_no_interp = population - imputed_deaths_no_interp) %>%
  dplyr::select(-c(imputed_deaths, num_alive)) #want to remove the outcome from main analysis to not get confused

#compute the number of states with intervention
sensitivity_anlys_imputed_od_wo_interp_data <- sensitivity_anlys_imputed_od_wo_interp_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_with_intervention = sum(Intervention_Redefined))

#run the model for analysis
sensitivity_anlys_imputed_od_wo_interp <- gam(cbind(round(imputed_deaths_no_interp),
                                                    round(num_alive_no_interp))~ State +
                                                s(Time_Period_ID, bs = "cr",
                                                  by = as.factor(Region)) +
                                                Naloxone_Pharmacy_Yes_Redefined +
                                                Naloxone_Pharmacy_No_Redefined +
                                                Medical_Marijuana_Redefined +
                                                Recreational_Marijuana_Redefined +
                                                GSL_Redefined +
                                                PDMP_Redefined +
                                                Medicaid_Expansion_Redefined +
                                                Intervention_Redefined + 
                                                num_states_w_intervention,
                                              data = sensitivity_anlys_imputed_od_wo_interp_data,
                                              family = "binomial")
stargazer(sensitivity_anlys_imputed_od_wo_interp, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateAlaska 0.251***
(0.028)
StateArizona 0.305***
(0.014)
StateArkansas -0.395***
(0.020)
StateCalifornia -0.170***
(0.013)
StateColorado 0.093***
(0.016)
StateConnecticut 0.192***
(0.016)
StateDelaware 0.438***
(0.022)
StateFlorida 0.268***
(0.012)
StateGeorgia -0.087***
(0.013)
StateHawaii -0.217***
(0.026)
StateIdaho -0.159***
(0.024)
StateIllinois -0.022*
(0.013)
StateIndiana 0.079***
(0.014)
StateIowa -0.745***
(0.021)
StateKansas -0.343***
(0.019)
StateKentucky 0.641***
(0.014)
StateLouisiana 0.283***
(0.014)
StateMaine 0.167***
(0.022)
StateMaryland -1.069***
(0.019)
StateMassachusetts 0.221***
(0.014)
StateMichigan -0.020
(0.014)
StateMinnesota -0.625***
(0.017)
StateMississippi -0.110***
(0.018)
StateMissouri 0.190***
(0.015)
StateMontana -0.357***
(0.029)
StateNebraska -0.896***
(0.029)
StateNevada 0.432***
(0.017)
StateNew Hampshire 0.272***
(0.020)
StateNew Jersey 0.106***
(0.013)
StateNew Mexico 0.620***
(0.017)
StateNew York -0.237***
(0.013)
StateNorth Carolina 0.175***
(0.013)
StateNorth Dakota -1.054***
(0.045)
StateOhio 0.453***
(0.012)
StateOklahoma 0.377***
(0.015)
StateOregon -0.187***
(0.018)
StatePennsylvania 0.444***
(0.012)
StateRhode Island 0.255***
(0.022)
StateSouth Carolina 0.227***
(0.015)
StateSouth Dakota -0.952***
(0.042)
StateTennessee 0.437***
(0.013)
StateTexas -0.231***
(0.012)
StateUtah 0.012
(0.018)
StateVermont -0.147***
(0.031)
StateVirginia -0.111***
(0.014)
StateWashington 0.071***
(0.015)
StateWest Virginia 0.896***
(0.015)
StateWisconsin -0.038***
(0.015)
StateWyoming 0.020
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.025***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.007
(0.007)
Medical_Marijuana_Redefined 0.063***
(0.006)
Recreational_Marijuana_Redefined -0.037***
(0.009)
GSL_Redefined 0.036***
(0.006)
PDMP_Redefined -0.021***
(0.006)
Medicaid_Expansion_Redefined 0.100***
(0.006)
Intervention_Redefined 0.069***
(0.005)
num_states_w_intervention 0.014***
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -11.060***
(0.071)
Observations 2,000
Adjusted R2 0.914
Log Likelihood -16,706.440
UBRE 8.795
Note: p<0.1; p<0.05; p<0.01
# exp(coef(sensitivity_anlys_imputed_od_wo_interp)["Intervention_Redefined"]) 

7.2 Compile Results

########## Sensitivity Analysis 3: Make Data Frame of Results and 95% CI #############
#store the coefficients into the table
sensitivity_anlys_wo_interp_full_table<-data.frame(coef(sensitivity_anlys_imputed_od_wo_interp))
#check to see how the table looks
head(sensitivity_anlys_wo_interp_full_table)
##                 coef.sensitivity_anlys_imputed_od_wo_interp.
## (Intercept)                                     -11.06009482
## StateAlaska                                       0.25068631
## StateArizona                                      0.30480530
## StateArkansas                                    -0.39548337
## StateCalifornia                                  -0.17028202
## StateColorado                                     0.09282977
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_wo_interp_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined",
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_wo_interp_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_wo_interp_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_wo_interp_full_table)[i]<-substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 6,
                                                                       stop = nchar(rownames(sensitivity_anlys_wo_interp_full_table)[i]))

    }else if(rownames(sensitivity_anlys_wo_interp_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_wo_interp_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_wo_interp_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                      substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], 
                                                                             start = 36,
                                                                             stop = nchar(rownames(sensitivity_anlys_wo_interp_full_table)[i])),
                                                                      sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_wo_interp_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
sensitivity_anlys_wo_interp_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_wo_interp_full_table$Odds_Ratio<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate)
sensitivity_anlys_wo_interp_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_wo_interp_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_wo_interp_full_table$Standard_Error<-summary(sensitivity_anlys_imputed_od_wo_interp)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_wo_interp_full_table$p_value<-c(summary(sensitivity_anlys_imputed_od_wo_interp)$p.pv,
                                                       rep(NA, length(coef(sensitivity_anlys_imputed_od_wo_interp)) -
                                                             length(summary(sensitivity_anlys_imputed_od_wo_interp)$p.pv)))

head(sensitivity_anlys_wo_interp_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama         -11.06009482            -11.19923007
## Alaska                      0.25068631              0.19663130
## Arizona                     0.30480530              0.27747665
## Arkansas                   -0.39548337             -0.43399804
## California                 -0.17028202             -0.19628379
## Colorado                    0.09282977              0.06139557
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama             -10.9209596 1.572758e-05  1.368473e-05
## Alaska                          0.3047413 1.284907e+00  1.217295e+00
## Arizona                         0.3321339 1.356361e+00  1.319795e+00
## Arkansas                       -0.3569687 6.733545e-01  6.479135e-01
## California                     -0.1442803 8.434269e-01  8.217790e-01
## Colorado                        0.1242640 1.097275e+00  1.063319e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  1.807538e-05     0.07098737  0.000000e+00
## Alaska             1.356274e+00     0.02757909  9.928986e-20
## Arizona            1.393940e+00     0.01394319 6.173866e-106
## Arkansas           6.997944e-01     0.01965034  4.366013e-90
## California         8.656451e-01     0.01326621  1.033639e-37
## Colorado           1.132315e+00     0.01603786  7.115952e-09
tail(sensitivity_anlys_wo_interp_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4           0.17042076              0.13453532
## Smoothed Time for Region West.5           0.04573535             -0.02191218
## Smoothed Time for Region West.6          -0.03879960             -0.13428255
## Smoothed Time for Region West.7          -0.03184967             -0.14455282
## Smoothed Time for Region West.8           0.01936577             -0.11853176
## Smoothed Time for Region West.9           0.18813733              0.07193732
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4              0.20630619  1.1858037
## Smoothed Time for Region West.5              0.11338287  1.0467973
## Smoothed Time for Region West.6              0.05668336  0.9619435
## Smoothed Time for Region West.7              0.08085348  0.9686522
## Smoothed Time for Region West.8              0.15726331  1.0195545
## Smoothed Time for Region West.9              0.30433734  1.2069993
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4     1.1440051      1.229129     0.01830890
## Smoothed Time for Region West.5     0.9783262      1.120061     0.03451404
## Smoothed Time for Region West.6     0.8743430      1.058321     0.04871579
## Smoothed Time for Region West.7     0.8654092      1.084212     0.05750161
## Smoothed Time for Region West.8     0.8882236      1.170304     0.07035588
## Smoothed Time for Region West.9     1.0745880      1.355726     0.05928572
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_wo_interp_full_table) %in% covariates)
sensitivity_anlys_wo_interp_covariate_table<-(round(sensitivity_anlys_wo_interp_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_wo_interp_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                         "Naloxone_Pharmacy_No",
                                                         "Medical_Marijuana",
                                                         "Recreational_Marijuana",
                                                         "GSL", 
                                                         "PDMP", 
                                                         "Medicaid_Expansion",
                                                         "Intervention_Redefined",
                                                         "Number of States with Intervention")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sensitivity_anlys_wo_interp_covariate_table<-rbind(sensitivity_anlys_wo_interp_covariate_table, sensitivity_anlys_wo_interp_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_wo_interp_covariate_table<-sensitivity_anlys_wo_interp_covariate_table[,-which(colnames(sensitivity_anlys_wo_interp_covariate_table) %in%
                                                                                                             c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sensitivity_anlys_wo_interp_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_wo_interp_covariate_table, 10)
##                                    Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                      9.751200e-01 9.604100e-01
## Naloxone_Pharmacy_No                       1.006860e+00 9.940900e-01
## Medical_Marijuana                          1.065190e+00 1.053450e+00
## Recreational_Marijuana                     9.634900e-01 9.473200e-01
## GSL                                        1.036290e+00 1.024910e+00
## PDMP                                       9.794600e-01 9.683800e-01
## Medicaid_Expansion                         1.105310e+00 1.092270e+00
## Intervention_Redefined                     1.071000e+00 1.059660e+00
## Number of States with Intervention         1.014360e+00 1.009570e+00
## Intercept/Alabama                          1.572758e-05 1.368473e-05
##                                     RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes              9.900500e-01 0.00116
## Naloxone_Pharmacy_No               1.019790e+00 0.29392
## Medical_Marijuana                  1.077050e+00 0.00000
## Recreational_Marijuana             9.799300e-01 0.00002
## GSL                                1.047790e+00 0.00000
## PDMP                               9.906800e-01 0.00035
## Medicaid_Expansion                 1.118520e+00 0.00000
## Intervention_Redefined             1.082460e+00 0.00000
## Number of States with Intervention 1.019170e+00 0.00000
## Intercept/Alabama                  1.807538e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_wo_interp_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_wo_interp.csv")

7.3 Attributable Deaths

################ Sensitivity Analysis 3: Number of Attributable Deaths #############
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_wo_interp<-sensitivity_anlys_imputed_od_wo_interp_data[which(sensitivity_anlys_imputed_od_wo_interp_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_wo_interpolation<-expit(-coef(sensitivity_anlys_imputed_od_wo_interp)["Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
                                     + logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_imputed_od_wo_interp) - 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
coef_ub<-coef(sensitivity_anlys_imputed_od_wo_interp) + 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_wo_interp<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
                                        + logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))

prob_od_no_int_UB_wo_interp<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
                                        + logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))

#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_imputed_od_wo_interp_data$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_wo_interp$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_wo_interp$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
                          - prob_od_no_int_wo_interpolation[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])

  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
                             - prob_od_no_int_LB_wo_interp[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
                             - prob_od_no_int_UB_wo_interp[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])


  index<-index + 1
}

num_attr_od_wo_interpolation<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_wo_interp$Time_Period_ID)),
                                       "Time_Start" = sort(unique(attr_deaths_anlys_wo_interp$Time_Period_Start)),
                                       "Num_Attr_Deaths" = num_attr_od,
                                       "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                       "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_wo_interpolation$Num_Attr_Deaths)
## [1] 36986.53
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_wo_interpolation<-num_attr_od_wo_interpolation %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_wo_interpolation$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   32.99  874.01 1558.61 1849.33 2550.54 4074.81

8 Sensitivity Analysis 4: Effect of DIH Prosecutions on Unintentional Overdose Deaths, Assuming Effect Lasts for Two Years

8.1 Analysis

########### Sensitivity Analysis 4: Two Year Intervention Effect ######################
#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")

#data cleaning
prosecution_data<-prosecution_data %>% 
  mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
  mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
         State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
  filter(!is.na(Date), State.Filed != "No Info", State.Filed != "No info", State.Filed != "No Info ",
         State != "")

#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")

#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
   Alabama         Alaska        Arizona       Arkansas     California 
        12              8              9              4             76 
  Colorado    Connecticut       Delaware        Florida        Georgia 
        32             47              3            138             29 
     Idaho       Illinois        Indiana           Iowa         Kansas 
         9            342             55             31              9 
  Kentucky      Louisiana          Maine       Maryland  Massachusetts 
        43             65             17             63             34 
  Michigan      Minnesota    Mississippi       Missouri        Montana 
       116            140              1             45             11 
  Nebraska         Nevada  New Hampshire     New Jersey     New Mexico 
         1             13             42            137              4 
  New York North Carolina   North Dakota           Ohio       Oklahoma 
       110            124             53            404             41 
    Oregon   Pennsylvania   Rhode Island South Carolina   South Dakota 
        19            726              2             12             13 
 Tennessee          Texas           Utah        Vermont       Virginia 
        94             44             21             13             63 
Washington  West Virginia      Wisconsin        Wyoming 
        78             33            381             19 
#change date charged into Date object
prosecution_data$Date<-mdy(prosecution_data$Date.charged)

#group the data into six month periods
prosecution_data<-prosecution_data %>% mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))

#count the number of prosecution media alerts in each six month period
#we also get the first and last date of prosecution in time period
prosecution_data_by_six_month_pd <- prosecution_data %>%
  filter(year(six_month_pd)>1999 & year(six_month_pd)<2020) %>%
  group_by(State, six_month_pd) %>%
  summarise(first_date_in_pd = min(Date), last_date_in_pd = max(Date))

#create the data set used for this sensitivity analysis
#first, we merge the grouped prosecution data set with the main data set by state and time period
sensitivity_anlys_redefine_int_data<-merge(main_analysis_data,
                                           prosecution_data_by_six_month_pd, 
                                           by.x = c("State", "Time_Period_Start"),
                                           by.y = c("State", "six_month_pd"), all = TRUE)

#create a intervention 2 year effect variable by initializing it to be all 0
sensitivity_anlys_redefine_int_data<-sensitivity_anlys_redefine_int_data %>% 
  group_by(State) %>%
  mutate(int_2_yr_effect = 0)

#change the date into a date object
sensitivity_anlys_redefine_int_data$Time_Period_Start<-as.Date(sensitivity_anlys_redefine_int_data$Time_Period_Start)
sensitivity_anlys_redefine_int_data$Time_Period_End<-as.Date(sensitivity_anlys_redefine_int_data$Time_Period_End)

#we need to impute the newly defined intervention variable depending on the case
#by examining each row of the data set
for(state in unique(sensitivity_anlys_redefine_int_data$State)){
  #first, subset the data set into state_data which only contains the data for the state
  state_index<-which(sensitivity_anlys_redefine_int_data$State == state)
  state_data<-sensitivity_anlys_redefine_int_data[state_index,]

  #note that the first four rows of the 2 year effect intervention variable are the same as the
  #first four rows of the original intervention variable
  state_data$int_2_yr_effect[1:4]<-state_data$Intervention_Redefined[1:4]

  for(i in 5:nrow(state_data)){
    #next, we deal with the rows where there was at least one prosecution in the last 3 six month periods
    #These rows will be imputed with a 1
    if((!is.na(state_data$first_date_in_pd[i - 1]) |
        !is.na(state_data$first_date_in_pd[i - 2]) |
        !is.na(state_data$first_date_in_pd[i - 3]))){

      state_data$int_2_yr_effect[i]<-1

    }else{
      #next, we account for the rows with the fractions:
      # 1) an intervention occurs in row i without an intervention 2 years ago
      # 2) row i contains the lasting effects of an intervention that occurred 2 years ago
      # 3) row i contains effects from both a new intervention starting in row i and lasting
      # effects from 2 years ago

      #To compute the fraction, we add the number of days that are affected by an intervention
      #(from both the current prosecution and previous prosecution) and then divide by the total
      #number of days in the period:

      total_len_of_pd<-as.numeric(state_data$Time_Period_End[i] - state_data$Time_Period_Start[i])

      #If there is no prosecution two years ago, i.e. in period i-4, then the last_date is the first
      #date in period i. We subtract the last_date by the first date in the period, so we will get
      #a 0 for the number of days that are affected by a prosecution from period i-4. Otherwise,
      #the last_date is the last date of prosecution from period i-4, plus 2 years.
      len_of_past_effect <- ifelse(!is.na(state_data$first_date_in_pd[i - 4]),
                                   (state_data$last_date_in_pd[i - 4] + years(2)) - state_data$Time_Period_Start[i],
                                   0)

      #If there is no prosecution in the period i, then the start_date is the last date in the period i.
      #We subtract start_date from the last date in period i, so we will get a 0 for the number
      #of days that are affected by a prosecution in period i. Otherwise, the start_date is the
      #first date of a prosecution in period i.
      len_of_current_effect <- ifelse(!is.na(state_data$first_date_in_pd[i]),
                                      as.numeric(state_data$Time_Period_End[i] - state_data$first_date_in_pd[i]),
                                      0)

      state_data$int_2_yr_effect[i]<-(len_of_past_effect + len_of_current_effect)/total_len_of_pd
    }
  }

  #for the case where the int_2_yr_effect is greater than 1 (could result when we add the effects of
  #previous intervention and the current intervention), we just impute a 1 instead
  state_data$int_2_yr_effect[state_data$int_2_yr_effect>1]<-1

  #lastly, we store the int_2_yr_effect variable into the sensitivity analysis data set
  sensitivity_anlys_redefine_int_data$int_2_yr_effect[state_index]<-state_data$int_2_yr_effect
}

#view the data set just to make sure the imputation looks right
# View(sensitivity_anlys_redefine_int_data %>% select(State, Time_Period_Start, Time_Period_End,
#                                                     Intervention_Redefined, first_date_in_pd,
#                                                     last_date_in_pd,
#                                                     int_2_yr_effect))


sensitivity_anlys_redefine_int_data <- sensitivity_anlys_redefine_int_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention_2_yr_effect = sum(int_2_yr_effect))

#run the analysis on the sensitivity analysis data
sensitivity_anlys_redefine_int_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                            s(Time_Period_ID, bs = "cr", by = as.factor(Region))  +
                                            Naloxone_Pharmacy_Yes_Redefined +
                                            Naloxone_Pharmacy_No_Redefined +
                                            Medical_Marijuana_Redefined +
                                            Recreational_Marijuana_Redefined +
                                            GSL_Redefined +
                                            PDMP_Redefined +
                                            Medicaid_Expansion_Redefined +
                                            int_2_yr_effect + 
                                            num_states_w_intervention_2_yr_effect,
                                          data = sensitivity_anlys_redefine_int_data, family = "binomial")

stargazer(sensitivity_anlys_redefine_int_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateAlaska 0.236***
(0.028)
StateArizona 0.304***
(0.014)
StateArkansas -0.375***
(0.020)
StateCalifornia -0.168***
(0.013)
StateColorado 0.082***
(0.016)
StateConnecticut 0.186***
(0.016)
StateDelaware 0.427***
(0.022)
StateFlorida 0.265***
(0.012)
StateGeorgia -0.069***
(0.013)
StateHawaii -0.237***
(0.025)
StateIdaho -0.168***
(0.024)
StateIllinois -0.025*
(0.013)
StateIndiana 0.073***
(0.014)
StateIowa -0.743***
(0.021)
StateKansas -0.324***
(0.019)
StateKentucky 0.634***
(0.014)
StateLouisiana 0.285***
(0.014)
StateMaine 0.163***
(0.022)
StateMaryland -1.067***
(0.019)
StateMassachusetts 0.222***
(0.014)
StateMichigan -0.028**
(0.014)
StateMinnesota -0.634***
(0.017)
StateMississippi -0.102***
(0.018)
StateMissouri 0.195***
(0.015)
StateMontana -0.350***
(0.029)
StateNebraska -0.877***
(0.029)
StateNevada 0.436***
(0.017)
StateNew Hampshire 0.266***
(0.020)
StateNew Jersey 0.110***
(0.013)
StateNew Mexico 0.629***
(0.017)
StateNew York -0.236***
(0.013)
StateNorth Carolina 0.176***
(0.013)
StateNorth Dakota -1.062***
(0.045)
StateOhio 0.450***
(0.012)
StateOklahoma 0.370***
(0.015)
StateOregon -0.195***
(0.018)
StatePennsylvania 0.441***
(0.012)
StateRhode Island 0.261***
(0.022)
StateSouth Carolina 0.218***
(0.015)
StateSouth Dakota -0.979***
(0.043)
StateTennessee 0.446***
(0.013)
StateTexas -0.223***
(0.012)
StateUtah 0.021
(0.018)
StateVermont -0.141***
(0.031)
StateVirginia -0.111***
(0.014)
StateWashington 0.063***
(0.015)
StateWest Virginia 0.886***
(0.015)
StateWisconsin -0.039***
(0.015)
StateWyoming 0.021
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.034***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.003
(0.007)
Medical_Marijuana_Redefined 0.068***
(0.006)
Recreational_Marijuana_Redefined -0.039***
(0.009)
GSL_Redefined 0.037***
(0.006)
PDMP_Redefined -0.016***
(0.006)
Medicaid_Expansion_Redefined 0.103***
(0.006)
int_2_yr_effect 0.057***
(0.005)
num_states_w_intervention_2_yr_effect -0.002**
(0.001)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -10.576***
(0.028)
Observations 2,000
Adjusted R2 0.914
Log Likelihood -16,739.990
UBRE 8.829
Note: p<0.1; p<0.05; p<0.01

8.2 Plots

plot(sensitivity_anlys_redefine_int_model)

8.3 Compile Results

############## Sensitivity Analysis 4: Make Data Frame of Results and 95% CI ##########
#store the coefficients into the table
sensitivity_anlys_redefine_int_full_table<-data.frame(coef(sensitivity_anlys_redefine_int_model))
#check to see how the table looks
head(sensitivity_anlys_redefine_int_full_table)
##                 coef.sensitivity_anlys_redefine_int_model.
## (Intercept)                                   -10.57596361
## StateAlaska                                     0.23607377
## StateArizona                                    0.30392085
## StateArkansas                                  -0.37525216
## StateCalifornia                                -0.16802202
## StateColorado                                   0.08162927
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_redefine_int_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "int_2_yr_effect", 
              "num_states_w_intervention_2_yr_effect")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_redefine_int_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_redefine_int_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_redefine_int_full_table)[i]<-substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 6,
                                                                     stop = nchar(rownames(sensitivity_anlys_redefine_int_full_table)[i]))

    }else if(rownames(sensitivity_anlys_redefine_int_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_redefine_int_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_redefine_int_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                    substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 36,
                                                                           stop = nchar(rownames(sensitivity_anlys_redefine_int_full_table)[i])),
                                                                    sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_redefine_int_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_redefine_int_model)$se
sensitivity_anlys_redefine_int_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_redefine_int_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_redefine_int_full_table$Odds_Ratio<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate)
sensitivity_anlys_redefine_int_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_redefine_int_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_redefine_int_full_table$Standard_Error<-summary(sensitivity_anlys_redefine_int_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_redefine_int_full_table$p_value<-c(summary(sensitivity_anlys_redefine_int_model)$p.pv,
                                                     rep(NA, length(coef(sensitivity_anlys_redefine_int_model)) -
                                                           length(summary(sensitivity_anlys_redefine_int_model)$p.pv)))

head(sensitivity_anlys_redefine_int_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama         -10.57596361            -10.63016920
## Alaska                      0.23607377              0.18213815
## Arizona                     0.30392085              0.27662076
## Arkansas                   -0.37525216             -0.41405152
## California                 -0.16802202             -0.19398457
## Colorado                    0.08162927              0.05026134
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama             -10.5217580 2.552216e-05  2.417554e-05
## Alaska                          0.2900094 1.266268e+00  1.199780e+00
## Arizona                         0.3312209 1.355162e+00  1.318666e+00
## Arkansas                       -0.3364528 6.871160e-01  6.609669e-01
## California                     -0.1420595 8.453352e-01  8.236706e-01
## Colorado                        0.1129972 1.085053e+00  1.051546e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  2.694378e-05     0.02765591  0.000000e+00
## Alaska             1.336440e+00     0.02751817  9.584115e-18
## Arizona            1.392667e+00     0.01392862 1.502265e-105
## Arkansas           7.142996e-01     0.01979559  3.913869e-80
## California         8.675697e-01     0.01324620  7.203055e-37
## Colorado           1.119629e+00     0.01600405  3.386861e-07
tail(sensitivity_anlys_redefine_int_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2615087               0.2405179
## Smoothed Time for Region West.5            0.2690181               0.2375173
## Smoothed Time for Region West.6            0.2780697               0.2400095
## Smoothed Time for Region West.7            0.3640020               0.3156273
## Smoothed Time for Region West.8            0.5011084               0.4431453
## Smoothed Time for Region West.9            0.5925780               0.5371732
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2824996   1.298888
## Smoothed Time for Region West.5               0.3005190   1.308679
## Smoothed Time for Region West.6               0.3161298   1.320578
## Smoothed Time for Region West.7               0.4123766   1.439077
## Smoothed Time for Region West.8               0.5590715   1.650550
## Smoothed Time for Region West.9               0.6479829   1.808645
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.271908      1.326441     0.01070961
## Smoothed Time for Region West.5      1.268097      1.350560     0.01607187
## Smoothed Time for Region West.6      1.271261      1.371808     0.01941846
## Smoothed Time for Region West.7      1.371119      1.510403     0.02468092
## Smoothed Time for Region West.8      1.557599      1.749048     0.02957300
## Smoothed Time for Region West.9      1.711163      1.911681     0.02826777
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_redefine_int_full_table) %in% covariates)
sensitivity_anlys_redefine_int_covariate_table<-(round(sensitivity_anlys_redefine_int_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_redefine_int_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                            "Naloxone_Pharmacy_No",
                                                            "Medical_Marijuana",
                                                            "Recreational_Marijuana",
                                                            "GSL", 
                                                            "PDMP", 
                                                            "Medicaid_Expansion",
                                                            "Two Year Intervention Effect", 
                                                            "Number of States w Intervention")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sensitivity_anlys_redefine_int_covariate_table<-rbind(sensitivity_anlys_redefine_int_covariate_table, sensitivity_anlys_redefine_int_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_redefine_int_covariate_table<-sensitivity_anlys_redefine_int_covariate_table[,-which(colnames(sensitivity_anlys_redefine_int_covariate_table) %in%
                                                                                                         c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sensitivity_anlys_redefine_int_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_redefine_int_covariate_table, 10)
##                                 Risk_Ratio_Estimates  RR_95_CI_LB  RR_95_CI_UB
## Naloxone_Pharmacy_Yes                   9.662600e-01 9.516100e-01 9.811400e-01
## Naloxone_Pharmacy_No                    1.003040e+00 9.902800e-01 1.015960e+00
## Medical_Marijuana                       1.070650e+00 1.058830e+00 1.082600e+00
## Recreational_Marijuana                  9.619700e-01 9.458300e-01 9.783900e-01
## GSL                                     1.037200e+00 1.025810e+00 1.048720e+00
## PDMP                                    9.836600e-01 9.725300e-01 9.949100e-01
## Medicaid_Expansion                      1.108410e+00 1.095370e+00 1.121610e+00
## Two Year Intervention Effect            1.059060e+00 1.049340e+00 1.068860e+00
## Number of States w Intervention         9.975600e-01 9.955400e-01 9.995800e-01
## Intercept/Alabama                       2.552216e-05 2.417554e-05 2.694378e-05
##                                 p-value
## Naloxone_Pharmacy_Yes           0.00001
## Naloxone_Pharmacy_No            0.64209
## Medical_Marijuana               0.00000
## Recreational_Marijuana          0.00001
## GSL                             0.00000
## PDMP                            0.00453
## Medicaid_Expansion              0.00000
## Two Year Intervention Effect    0.00000
## Number of States w Intervention 0.01796
## Intercept/Alabama               0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_redefine_int_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_redefine_int.csv")

8.4 Attributable Deaths

################ Sensitivity Analysis 4: Number of Attributable Deaths ################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_redefine_int<-sensitivity_anlys_redefine_int_data[which(sensitivity_anlys_redefine_int_data$int_2_yr_effect>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_redefine_int<-expit(-coef(sensitivity_anlys_redefine_int_model)["int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
                                   + logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_redefine_int_model) - 1.96*summary(sensitivity_anlys_redefine_int_model)$se
coef_ub<-coef(sensitivity_anlys_redefine_int_model) + 1.96*summary(sensitivity_anlys_redefine_int_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_redefine_int<-expit(-coef_lb[names(coef_lb) == "int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
                                      + logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))

prob_od_no_int_UB_redefine_int<-expit(-coef_ub[names(coef_ub) == "int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
                                      + logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))

#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_redefine_int_data$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_redefine_int$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_redefine_int$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
                          - prob_od_no_int_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])

  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])


  index<-index + 1
}

num_attr_od_redefine_int<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_redefine_int$Time_Period_ID)),
                                     "Time_Start" = sort(unique(attr_deaths_anlys_redefine_int$Time_Period_Start)),
                                     "Num_Attr_Deaths" = num_attr_od,
                                     "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                     "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_redefine_int$Num_Attr_Deaths)
## [1] 28065.24
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_redefine_int<-num_attr_od_redefine_int %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths),
            death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_redefine_int$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   27.72  492.61 1181.11 1403.26 1984.69 3349.26
ggplot(yearly_num_Attr_Deaths_redefine_int, aes(x = year, y = deaths)) + geom_line() + geom_point()

9 Sensitivity Analysis 5: Effect of Lagged DIH Prosecutions on Unintentional Overdose Deaths, Assuming Effect Lasts for Two Years

9.1 Analysis

############# Sensitivity Analysis 5: Two Year Effect Lagged by 6 months ######################
#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")

#data cleaning
#data cleaning
prosecution_data<-prosecution_data %>% 
  mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
  mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
         State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
  filter(!is.na(Date), State.Filed != "No Info", State.Filed != "No info", State.Filed != "No Info ",
         State != "")

#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")

#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
   Alabama         Alaska        Arizona       Arkansas     California 
        12              8              9              4             76 
  Colorado    Connecticut       Delaware        Florida        Georgia 
        32             47              3            138             29 
     Idaho       Illinois        Indiana           Iowa         Kansas 
         9            342             55             31              9 
  Kentucky      Louisiana          Maine       Maryland  Massachusetts 
        43             65             17             63             34 
  Michigan      Minnesota    Mississippi       Missouri        Montana 
       116            140              1             45             11 
  Nebraska         Nevada  New Hampshire     New Jersey     New Mexico 
         1             13             42            137              4 
  New York North Carolina   North Dakota           Ohio       Oklahoma 
       110            124             53            404             41 
    Oregon   Pennsylvania   Rhode Island South Carolina   South Dakota 
        19            726              2             12             13 
 Tennessee          Texas           Utah        Vermont       Virginia 
        94             44             21             13             63 
Washington  West Virginia      Wisconsin        Wyoming 
        78             33            381             19 
#change date charged into Date object
prosecution_data$Date<-mdy(prosecution_data$Date.charged)

#group the data into six month periods
prosecution_data<-prosecution_data %>% mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))

#count the number of prosecution media alerts in each six month period
#we also get the first and last date of prosecution in time period
prosecution_data_by_six_month_pd <- prosecution_data %>%
  filter(year(six_month_pd)>1999 & year(six_month_pd)<2020) %>%
  group_by(State, six_month_pd) %>%
  summarise(first_date_in_pd = min(Date), last_date_in_pd = max(Date))

#create the data set used for this sensitivity analysis
#first, we merge the grouped prosecution data set with the main data set by state and time period
sensitivity_anlys_2yr_int_lag<-merge(main_analysis_data,
                                     prosecution_data_by_six_month_pd,
                                     by.x = c("State", "Time_Period_Start"),
                                     by.y = c("State", "six_month_pd"), all = TRUE)

#create a intervention 2 year effect variable by initializing it to be all 0
sensitivity_anlys_2yr_int_lag<-sensitivity_anlys_2yr_int_lag %>% 
  group_by(State) %>%
  mutate(int_2_yr_effect_lag = 0)

#change the date into a date object
sensitivity_anlys_2yr_int_lag$Time_Period_Start<-as.Date(sensitivity_anlys_2yr_int_lag$Time_Period_Start)
sensitivity_anlys_2yr_int_lag$Time_Period_End<-as.Date(sensitivity_anlys_2yr_int_lag$Time_Period_End)

#we need to calculate the 2 year intervention variable depending on the case
#by examining each row of the data set
for(state in unique(sensitivity_anlys_2yr_int_lag$State)){
  #first, subset the data set into state_data which only contains the data for the state
  state_index<-which(sensitivity_anlys_2yr_int_lag$State == state)
  state_data<-sensitivity_anlys_2yr_int_lag[state_index,]

  for(i in 1:(nrow(state_data)-1)){
    #for the states that had at least one prosecution in the first 2 years of analysis period,
    #we impute the intervention variable based on the intervention variable of main analysis:
    #if intervention occurred in time i, then for the 6-month lagged effect, we compute the
    #proportion of days affected by intervention, taking into account the 6 month lag. Else,
    #if the intervention had occurred by time i, we impute a 1 in the next six-month interval
    #since lagged
    if(i %in% c(1:4)){
      if(state_data$Intervention_Redefined[i] > 0 & state_data$Intervention_Redefined[i] < 1){
        state_data$int_2_yr_effect_lag[i + 1] <- as.numeric(state_data$Time_Period_End[i + 1] - (state_data$first_date_in_pd[i] %m+% months(6)))/as.numeric(state_data$Time_Period_End[i + 1] - state_data$Time_Period_Start[i + 1])
      }else if(state_data$Intervention_Redefined[i] == 1){
        state_data$int_2_yr_effect_lag[i + 1] <- 1
      }

      #next, if there was at least one prosecution in the last 3 six-month periods (i.e. 1.5 years) before time i
      #These rows will be imputed with a 1 in the next six-month interval since lagged
    }else if((!is.na(state_data$first_date_in_pd[i - 1]) |
              !is.na(state_data$first_date_in_pd[i - 2]) |
              !is.na(state_data$first_date_in_pd[i - 3]))){

      state_data$int_2_yr_effect_lag[i + 1]<-1

    }else{
      #next, we account for the rows with the fractions:
      # 1) an intervention occurs in row i without an intervention 2 years ago
      # 2) row i contains the lasting effects of an intervention that occurred 2 years ago
      # 3) row i contains effects from both a new intervention starting in row i and lasting
      # effects from 2 years ago

      #To compute the fraction, we add the number of days that are affected by an intervention
      #(from both the current prosecution and previous prosecution) and then divide by the total
      #number of days in the period:

      #first, we compute the number of days in the period of time interval i + 1
      total_len_of_pd<-as.numeric(state_data$Time_Period_End[i + 1] - state_data$Time_Period_Start[i + 1])

      #If there is no prosecution two years ago, i.e. in period i-4, then the last_date is the first
      #date in period i + 1. We subtract the last_date by the first date in period i + 1, so we will get
      #a 0 for the number of days that are affected by a prosecution from period i-4. Otherwise,
      #the last_date is the last date of prosecution from period i-4, plus 2 years.
      #The start time is the first date of period i + 1

      len_of_past_effect <- ifelse(!is.na(state_data$first_date_in_pd[i - 4]),
                                   as.numeric((as.Date(state_data$last_date_in_pd[i - 4] + years(2), format = "%Y-%m-%d") %m+% months(6)) - state_data$Time_Period_Start[i + 1]),
                                   0)

      #If there is no prosecution in the period i, then the start_date is the last date in period i + 1 (because lagged effect).
      #We subtract start_date from the last date in period i + 1, so we will get a 0 for the number
      #of days that are affected by a prosecution in period i. Otherwise, the start_date is the
      #first date of a prosecution in period i + 6 months. The end date is the last date in period i + 1.

      len_of_current_effect <- ifelse(!is.na(state_data$first_date_in_pd[i]),
                                      as.numeric(state_data$Time_Period_End[i + 1] - (state_data$first_date_in_pd[i] %m+% months(6))),
                                      0)

      state_data$int_2_yr_effect_lag[i + 1]<-(len_of_past_effect + len_of_current_effect)/total_len_of_pd
    }
  }

  #for the case where the int_2_yr_effect is greater than 1 (could result when we add the effects of
  #previous intervention and the current intervention), we just impute a 1 instead
  state_data$int_2_yr_effect_lag[state_data$int_2_yr_effect_lag>1]<-1

  #lastly, we store the int_2_yr_effect variable into the sensitivity analysis data set
  sensitivity_anlys_2yr_int_lag$int_2_yr_effect_lag[state_index]<-state_data$int_2_yr_effect_lag
}

sensitivity_anlys_2yr_int_lag <- sensitivity_anlys_2yr_int_lag %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention_2yr_lag = sum(int_2_yr_effect_lag))
#view the data set just to make sure the imputation looks right
# View(sensitivity_anlys_2yr_int_lag %>% select(State, Time_Period_Start, Time_Period_End,
#                                                     Intervention_Redefined, first_date_in_pd,
#                                                     last_date_in_pd,
#                                                     int_2_yr_effect_lag))


#run the analysis for all the states
lagged_analysis_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                             s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                             Naloxone_Pharmacy_Yes_Redefined +
                             Naloxone_Pharmacy_No_Redefined +
                             Medical_Marijuana_Redefined +
                             Recreational_Marijuana_Redefined +
                             GSL_Redefined +
                             PDMP_Redefined +
                             Medicaid_Expansion_Redefined +
                             int_2_yr_effect_lag + 
                             num_states_w_intervention_2yr_lag,
                           data = sensitivity_anlys_2yr_int_lag, family = "binomial")

#summary output of the model
stargazer(lagged_analysis_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateAlaska 0.231***
(0.028)
StateArizona 0.298***
(0.014)
StateArkansas -0.388***
(0.020)
StateCalifornia -0.164***
(0.013)
StateColorado 0.081***
(0.016)
StateConnecticut 0.190***
(0.016)
StateDelaware 0.420***
(0.022)
StateFlorida 0.273***
(0.012)
StateGeorgia -0.069***
(0.013)
StateHawaii -0.249***
(0.025)
StateIdaho -0.176***
(0.024)
StateIllinois -0.020
(0.013)
StateIndiana 0.075***
(0.014)
StateIowa -0.740***
(0.021)
StateKansas -0.330***
(0.019)
StateKentucky 0.633***
(0.014)
StateLouisiana 0.288***
(0.014)
StateMaine 0.163***
(0.022)
StateMaryland -1.062***
(0.019)
StateMassachusetts 0.223***
(0.014)
StateMichigan -0.022
(0.014)
StateMinnesota -0.630***
(0.017)
StateMississippi -0.114***
(0.018)
StateMissouri 0.197***
(0.015)
StateMontana -0.348***
(0.029)
StateNebraska -0.890***
(0.029)
StateNevada 0.437***
(0.017)
StateNew Hampshire 0.267***
(0.020)
StateNew Jersey 0.113***
(0.013)
StateNew Mexico 0.621***
(0.017)
StateNew York -0.234***
(0.013)
StateNorth Carolina 0.179***
(0.013)
StateNorth Dakota -1.063***
(0.045)
StateOhio 0.456***
(0.012)
StateOklahoma 0.368***
(0.015)
StateOregon -0.195***
(0.018)
StatePennsylvania 0.447***
(0.012)
StateRhode Island 0.254***
(0.022)
StateSouth Carolina 0.211***
(0.015)
StateSouth Dakota -0.987***
(0.043)
StateTennessee 0.443***
(0.013)
StateTexas -0.221***
(0.012)
StateUtah 0.020
(0.018)
StateVermont -0.143***
(0.031)
StateVirginia -0.107***
(0.014)
StateWashington 0.067***
(0.015)
StateWest Virginia 0.884***
(0.015)
StateWisconsin -0.034**
(0.015)
StateWyoming 0.017
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.028***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.006
(0.007)
Medical_Marijuana_Redefined 0.066***
(0.006)
Recreational_Marijuana_Redefined -0.039***
(0.009)
GSL_Redefined 0.034***
(0.006)
PDMP_Redefined -0.017***
(0.006)
Medicaid_Expansion_Redefined 0.102***
(0.006)
int_2_yr_effect_lag 0.036***
(0.005)
num_states_w_intervention_2yr_lag 0.0001
(0.001)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -10.625***
(0.030)
Observations 2,000
Adjusted R2 0.913
Log Likelihood -16,786.840
UBRE 8.876
Note: p<0.1; p<0.05; p<0.01

9.2 Plots

gam.check(lagged_analysis_model)

## 
## Method: UBRE   Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-7.076835e-07,2.344539e-05]
## (score 8.875998 & scale 1).
## Hessian positive definite, eigenvalue range [8.794682e-05,0.0005175496].
## Model rank =  95 / 95 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                k'  edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest   9.00 8.88    1.05    0.99
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.95    1.05    0.97
## s(Time_Period_ID):as.factor(Region)South     9.00 8.92    1.05    0.99
## s(Time_Period_ID):as.factor(Region)West      9.00 7.86    1.05    0.97

9.3 Compile Results

############## Sensitivity Analysis 5: Make Data Frame of Results and 95% CI ###########
#store the coefficients into the table
sensitivity_anlys_2yr_int_lag_full_table<-data.frame(coef(lagged_analysis_model))
#check to see how the table looks
head(sensitivity_anlys_2yr_int_lag_full_table)
##                 coef.lagged_analysis_model.
## (Intercept)                    -10.62517671
## StateAlaska                      0.23148261
## StateArizona                     0.29812714
## StateArkansas                   -0.38803288
## StateCalifornia                 -0.16379686
## StateColorado                    0.08144761
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_2yr_int_lag_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "int_2_yr_effect_lag", 
              "num_states_w_intervention_2yr_lag")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_2yr_int_lag_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 6,
                                                                    stop = nchar(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]))

    }else if(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                   substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 36,
                                                                          stop = nchar(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i])),
                                                                   sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate - 1.96*summary(lagged_analysis_model)$se
sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate + 1.96*summary(lagged_analysis_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate)
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_2yr_int_lag_full_table$Standard_Error<-summary(lagged_analysis_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_2yr_int_lag_full_table$p_value<-c(summary(lagged_analysis_model)$p.pv, rep(NA, length(coef(lagged_analysis_model)) - length(summary(lagged_analysis_model)$p.pv)))

head(sensitivity_anlys_2yr_int_lag_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama         -10.62517671            -10.68345453
## Alaska                      0.23148261              0.17754728
## Arizona                     0.29812714              0.27082681
## Arkansas                   -0.38803288             -0.42681155
## California                 -0.16379686             -0.18975475
## Colorado                    0.08144761              0.05009146
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama             -10.5668989 2.429654e-05  2.292106e-05
## Alaska                          0.2854179 1.260467e+00  1.194285e+00
## Arizona                         0.3254275 1.347333e+00  1.311048e+00
## Arkansas                       -0.3492542 6.783900e-01  6.525865e-01
## California                     -0.1378390 8.489145e-01  8.271620e-01
## Colorado                        0.1128037 1.084856e+00  1.051367e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  2.575456e-05     0.02973358  0.000000e+00
## Alaska             1.330318e+00     0.02751803  4.029563e-17
## Arizona            1.384622e+00     0.01392874 1.233090e-101
## Arkansas           7.052138e-01     0.01978504  1.210815e-85
## California         8.712390e-01     0.01324382  3.904038e-35
## Colorado           1.119412e+00     0.01599803  3.559892e-07
tail(sensitivity_anlys_2yr_int_lag_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2622370               0.2419245
## Smoothed Time for Region West.5            0.2483856               0.2156427
## Smoothed Time for Region West.6            0.2513870               0.2094029
## Smoothed Time for Region West.7            0.3147938               0.2611352
## Smoothed Time for Region West.8            0.4464894               0.3806760
## Smoothed Time for Region West.9            0.5362440               0.4726100
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2825495   1.299835
## Smoothed Time for Region West.5               0.2811285   1.281954
## Smoothed Time for Region West.6               0.2933712   1.285808
## Smoothed Time for Region West.7               0.3684524   1.369977
## Smoothed Time for Region West.8               0.5123027   1.562816
## Smoothed Time for Region West.9               0.5998781   1.709574
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.273698      1.326507     0.01036353
## Smoothed Time for Region West.5      1.240659      1.324624     0.01670556
## Smoothed Time for Region West.6      1.232942      1.340940     0.02142049
## Smoothed Time for Region West.7      1.298403      1.445496     0.02737684
## Smoothed Time for Region West.8      1.463273      1.669130     0.03357822
## Smoothed Time for Region West.9      1.604176      1.821897     0.03246636
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#save the table into a CSV
# write.csv(round(sensitivity_anlys_2yr_int_lag_full_table,5), "./Data/coefficients_GAM_9_6_21_lagged_2yr_int.csv")

#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_2yr_int_lag_full_table) %in% covariates)
sens_analysis_2yr_int_lag_covariate_table<-(round(sensitivity_anlys_2yr_int_lag_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sens_analysis_2yr_int_lag_covariate_table)<-c("Naloxone_Pharmacy_Yes",
                                                       "Naloxone_Pharmacy_No",
                                                       "Medical_Marijuana",
                                                       "Recreational_Marijuana",
                                                       "GSL", 
                                                       "PDMP", 
                                                       "Medicaid_Expansion",
                                                       "Intervention", 
                                                       "Number of States w Intervention")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sens_analysis_2yr_int_lag_covariate_table<-rbind(sens_analysis_2yr_int_lag_covariate_table, sensitivity_anlys_2yr_int_lag_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sens_analysis_2yr_int_lag_covariate_table<-sens_analysis_2yr_int_lag_covariate_table[,-which(colnames(sens_analysis_2yr_int_lag_covariate_table) %in%
                                                                                               c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sens_analysis_2yr_int_lag_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sens_analysis_2yr_int_lag_covariate_table, 10)
##                                 Risk_Ratio_Estimates  RR_95_CI_LB  RR_95_CI_UB
## Naloxone_Pharmacy_Yes                   9.727500e-01 9.580600e-01 9.876700e-01
## Naloxone_Pharmacy_No                    1.005590e+00 9.928100e-01 1.018540e+00
## Medical_Marijuana                       1.068760e+00 1.056960e+00 1.080690e+00
## Recreational_Marijuana                  9.619900e-01 9.458600e-01 9.783900e-01
## GSL                                     1.034880e+00 1.023520e+00 1.046360e+00
## PDMP                                    9.827200e-01 9.716000e-01 9.939600e-01
## Medicaid_Expansion                      1.107650e+00 1.094610e+00 1.120840e+00
## Intervention                            1.036230e+00 1.026850e+00 1.045700e+00
## Number of States w Intervention         1.000120e+00 9.978200e-01 1.002420e+00
## Intercept/Alabama                       2.429654e-05 2.292106e-05 2.575456e-05
##                                 p-value
## Naloxone_Pharmacy_Yes           0.00037
## Naloxone_Pharmacy_No            0.39295
## Medical_Marijuana               0.00000
## Recreational_Marijuana          0.00001
## GSL                             0.00000
## PDMP                            0.00267
## Medicaid_Expansion              0.00000
## Intervention                    0.00000
## Number of States w Intervention 0.92042
## Intercept/Alabama               0.00000
#save the table into a CSV
# write.csv(round(sens_analysis_2yr_int_lag_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_2_yr_int_lag.csv")

9.4 Attributable Deaths

################# Sensitivity Analysis 5: Number of Attributable Deaths #################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_2yr_int_lag<-sensitivity_anlys_2yr_int_lag[which(sensitivity_anlys_2yr_int_lag$int_2_yr_effect_lag>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_2yr_int_lag<-expit(-coef(lagged_analysis_model)["int_2_yr_effect_lag"]
                                  + logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(lagged_analysis_model) - 1.96*summary(lagged_analysis_model)$se
coef_ub<-coef(lagged_analysis_model) + 1.96*summary(lagged_analysis_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_2yr_int_lag<-expit(-coef_lb[names(coef_lb) == "int_2_yr_effect_lag"]
                                     + logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))

prob_od_no_int_UB_2yr_int_lag<-expit(-coef_ub[names(coef_ub) == "int_2_yr_effect_lag"]
                                     + logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))


#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_2yr_int_lag$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
                          - prob_od_no_int_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
  index<-index + 1
}

num_attr_od_2yr_int_lag<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID)),
                                    "Time_Start" = sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_Start)),
                                    "Num_Attr_Deaths" = num_attr_od,
                                    "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                    "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths)
## [1] 17750.57
summary(num_attr_od_2yr_int_lag$Num_Attr_Deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.57  165.43  388.60  455.14  652.72 1103.01
num_attr_od_2yr_int_lag$Time_Start<-as.Date(num_attr_od_2yr_int_lag$Time_Start)

#compute the 95% CI for the total
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths_LB)
## [1] 13275.34
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths_UB)
## [1] 22185.3
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_2yr_int_lag<-num_attr_od_2yr_int_lag %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_2yr_int_lag$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.57  305.10  744.84  887.53 1260.68 2101.84
summary(yearly_num_Attr_Deaths_2yr_int_lag$death_lb)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    8.655  228.178  557.050  663.767  942.841 1571.928
summary(yearly_num_Attr_Deaths_2yr_int_lag$death_ub)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.46  381.32  930.92 1109.27 1575.64 2626.95
ggplot(yearly_num_Attr_Deaths_2yr_int_lag, aes(x = year, y = deaths)) + geom_line() + geom_point()

10 Compile Attributable Deaths Results

########################## Compiled Attributable Deaths Plot###############################
#add color column to the datasets
yearly_num_Attr_Deaths_main_analysis <- yearly_num_Attr_Deaths_main_analysis %>% 
  mutate(gp_type = "a")
yearly_num_Attr_Deaths_exclude_states <- yearly_num_Attr_Deaths_exclude_states %>%
  mutate(gp_type = "c")
yearly_num_Attr_Deaths_od_all <- yearly_num_Attr_Deaths_od_all %>%
  mutate(gp_type = "b")
yearly_num_Attr_Deaths_redefine_int <- yearly_num_Attr_Deaths_redefine_int %>%
  mutate(gp_type = "d")
# pdf("Figures/num_attr_deaths_yearly_for_all_anlys_5_12_21_all_od_shape_alpha.pdf")
ggplot(yearly_num_Attr_Deaths_main_analysis) +
  geom_line(aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1, color = gp_type,
                linetype = "Estimate")) +
  geom_point(aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1, color = gp_type, shape = gp_type))  +
  geom_line(yearly_num_Attr_Deaths_main_analysis,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval")) +
  geom_line(yearly_num_Attr_Deaths_main_analysis,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color =gp_type,linetype = "95% Confidence Interval")) +
  # geom_point(yearly_num_Attr_Deaths_main_analysis,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
  #                         color = "a")) +
  # geom_point(yearly_num_Attr_Deaths_main_analysis,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
  #                         color ="a")) +
  geom_line(yearly_num_Attr_Deaths_redefine_int,
            mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                          color = gp_type, linetype = "Estimate")) +
  geom_point(yearly_num_Attr_Deaths_redefine_int,
             mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                           color = gp_type, shape = gp_type))  +
  geom_line(yearly_num_Attr_Deaths_redefine_int,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval")) +
  geom_line(yearly_num_Attr_Deaths_redefine_int,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval")) +
  # geom_point(yearly_num_Attr_Deaths_redefine_int,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
  #                         color = "d")) +
  # geom_point(yearly_num_Attr_Deaths_redefine_int,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
  #                         color = "d")) +
  geom_line(yearly_num_Attr_Deaths_od_all, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                                                         color = gp_type, linetype = "Estimate"), alpha = 1) +
  geom_point(yearly_num_Attr_Deaths_od_all, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                                                          color = gp_type, shape = gp_type), alpha = 1)  +
  geom_line(yearly_num_Attr_Deaths_od_all,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"), alpha = 1) +
  geom_line(yearly_num_Attr_Deaths_od_all,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"),  alpha = 1) +
  # geom_point(yearly_num_Attr_Deaths_od_all,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
  #                         color = "b"), alpha = 0.5) +
  # geom_point(yearly_num_Attr_Deaths_od_all,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
  #                         color = "b") , alpha = 0.5) +
  geom_line(yearly_num_Attr_Deaths_exclude_states,
            mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                          color = gp_type, linetype = "Estimate"), alpha = 1) +
  geom_point(yearly_num_Attr_Deaths_exclude_states, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                                                                  color = gp_type, shape = gp_type), alpha = 1)  +
  geom_line(yearly_num_Attr_Deaths_exclude_states,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"),alpha = 1) +
  geom_line(yearly_num_Attr_Deaths_exclude_states,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"), alpha = 1) +
    # geom_point(yearly_num_Attr_Deaths_exclude_states,
    #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
    #                         color = "c"),alpha = 0.5) +
    # geom_point(yearly_num_Attr_Deaths_exclude_states,
    #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
    #                         color = "c"), alpha = 0.5) +
 theme(axis.text.y=element_text(size=10, family = "Times"),
        axis.title=element_text(size=10,face="bold", family = "Times"),
        panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(size = 10, family = "Times"),
        panel.background = element_rect("white"),
        legend.text=element_text(size=9, family = "Times"),
        legend.position = c(.4,.85),
       # legend.position = "bottom",
        legend.box="vertical", legend.margin=margin()) +
  guides(color=guide_legend(nrow=2,byrow=TRUE)) +
  labs(x = "Year", y = "Yearly Drug Overdose Deaths Attributable to DIH Prosecutions Reported in Media", color = "",
       linetype = "", shape = "") +
  scale_color_manual(values=c('black', 'red', 'green', 'deepskyblue'),
                     labels = c("Main Analysis: Unintentional Drug Overdose Deaths", "All Drug Overdose Deaths",
                                "Excluding States with At Least 75% Missing Monthly", "2 Year Effect")) +
  scale_x_date(date_labels="%Y", breaks = seq(as.Date("2000-01-01"), as.Date("2018-01-01"), by = "2 years")) +
  scale_linetype_manual(values = c("Estimate" = "solid", "95% Confidence Interval" = "dashed"),
                        breaks = c("Estimate", "95% Confidence Interval")) +
  guides(linetype = guide_legend(nrow = 1)) + 
  scale_shape_manual(values = c(16, 4, 5, 2), 
                     labels = c("Main Analysis: Unintentional Drug Overdose Deaths", "All Drug Overdose Deaths",
                                "Excluding States with At Least 75% Missing Monthly", "2 Year Effect"))

# dev.off()